Unverified Commit d4c2cb40 authored by Julien Chaumond's avatar Julien Chaumond Committed by GitHub
Browse files

Kill model archive maps (#4636)

* Kill model archive maps

* Fixup

* Also kill model_archive_map for MaskedBertPreTrainedModel

* Unhook config_archive_map

* Tokenizers: align with model id changes

* make style && make quality

* Fix CI
parent 47a551d1
...@@ -36,5 +36,4 @@ class CamembertConfig(RobertaConfig): ...@@ -36,5 +36,4 @@ class CamembertConfig(RobertaConfig):
superclass for the appropriate documentation alongside usage examples. superclass for the appropriate documentation alongside usage examples.
""" """
pretrained_config_archive_map = CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "camembert" model_type = "camembert"
...@@ -27,7 +27,7 @@ CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf ...@@ -27,7 +27,7 @@ CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP = {"ctrl": "https://storage.googleapis.com/sf
class CTRLConfig(PretrainedConfig): class CTRLConfig(PretrainedConfig):
""" """
This is the configuration class to store the configuration of an :class:`~transformers.CTRLModel`. This is the configuration class to store the configuration of a :class:`~transformers.CTRLModel`.
It is used to instantiate an CTRL model according to the specified arguments, defining the model It is used to instantiate an CTRL model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the `ctrl <https://huggingface.co/ctrl>`__ architecture from SalesForce. the `ctrl <https://huggingface.co/ctrl>`__ architecture from SalesForce.
...@@ -76,13 +76,8 @@ class CTRLConfig(PretrainedConfig): ...@@ -76,13 +76,8 @@ class CTRLConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "ctrl" model_type = "ctrl"
def __init__( def __init__(
......
...@@ -90,12 +90,7 @@ class DistilBertConfig(PretrainedConfig): ...@@ -90,12 +90,7 @@ class DistilBertConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "distilbert" model_type = "distilbert"
def __init__( def __init__(
......
...@@ -89,12 +89,7 @@ class ElectraConfig(PretrainedConfig): ...@@ -89,12 +89,7 @@ class ElectraConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "electra" model_type = "electra"
def __init__( def __init__(
......
...@@ -23,10 +23,10 @@ from .configuration_xlm import XLMConfig ...@@ -23,10 +23,10 @@ from .configuration_xlm import XLMConfig
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"flaubert-small-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json", "flaubert/flaubert_small_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_small_cased/config.json",
"flaubert-base-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json", "flaubert/flaubert_base_uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_uncased/config.json",
"flaubert-base-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json", "flaubert/flaubert_base_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_base_cased/config.json",
"flaubert-large-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json", "flaubert/flaubert_large_cased": "https://s3.amazonaws.com/models.huggingface.co/bert/flaubert/flaubert_large_cased/config.json",
} }
...@@ -142,7 +142,6 @@ class FlaubertConfig(XLMConfig): ...@@ -142,7 +142,6 @@ class FlaubertConfig(XLMConfig):
text in a given language. text in a given language.
""" """
pretrained_config_archive_map = FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "flaubert" model_type = "flaubert"
def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs): def __init__(self, layerdrop=0.0, pre_norm=False, pad_token_id=2, bos_token_id=0, **kwargs):
......
...@@ -110,13 +110,8 @@ class GPT2Config(PretrainedConfig): ...@@ -110,13 +110,8 @@ class GPT2Config(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "gpt2" model_type = "gpt2"
def __init__( def __init__(
......
...@@ -33,7 +33,7 @@ LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -33,7 +33,7 @@ LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class LongformerConfig(RobertaConfig): class LongformerConfig(RobertaConfig):
r""" r"""
This is the configuration class to store the configuration of an :class:`~transformers.LongformerModel`. This is the configuration class to store the configuration of a :class:`~transformers.LongformerModel`.
It is used to instantiate an Longformer model according to the specified arguments, defining the model It is used to instantiate an Longformer model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the RoBERTa `roberta-base <https://huggingface.co/roberta-base>`__ architecture with a sequence length 4,096. the RoBERTa `roberta-base <https://huggingface.co/roberta-base>`__ architecture with a sequence length 4,096.
...@@ -59,12 +59,7 @@ class LongformerConfig(RobertaConfig): ...@@ -59,12 +59,7 @@ class LongformerConfig(RobertaConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = LONGFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "longformer" model_type = "longformer"
def __init__(self, attention_window: Union[List[int], int] = 512, sep_token_id: int = 2, **kwargs): def __init__(self, attention_window: Union[List[int], int] = 512, sep_token_id: int = 2, **kwargs):
......
...@@ -18,10 +18,9 @@ from .configuration_bart import BartConfig ...@@ -18,10 +18,9 @@ from .configuration_bart import BartConfig
PRETRAINED_CONFIG_ARCHIVE_MAP = { PRETRAINED_CONFIG_ARCHIVE_MAP = {
"marian-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json", "Helsinki-NLP/opus-mt-en-de": "https://s3.amazonaws.com/models.huggingface.co/bert/Helsinki-NLP/opus-mt-en-de/config.json",
} }
class MarianConfig(BartConfig): class MarianConfig(BartConfig):
model_type = "marian" model_type = "marian"
pretrained_config_archive_map = PRETRAINED_CONFIG_ARCHIVE_MAP
...@@ -30,7 +30,7 @@ OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -30,7 +30,7 @@ OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class OpenAIGPTConfig(PretrainedConfig): class OpenAIGPTConfig(PretrainedConfig):
""" """
This is the configuration class to store the configuration of an :class:`~transformers.OpenAIGPTModel`. This is the configuration class to store the configuration of a :class:`~transformers.OpenAIGPTModel`.
It is used to instantiate an GPT model according to the specified arguments, defining the model It is used to instantiate an GPT model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the `GPT <https://huggingface.co/openai-gpt>`__ architecture from OpenAI. the `GPT <https://huggingface.co/openai-gpt>`__ architecture from OpenAI.
...@@ -108,13 +108,8 @@ class OpenAIGPTConfig(PretrainedConfig): ...@@ -108,13 +108,8 @@ class OpenAIGPTConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "openai-gpt" model_type = "openai-gpt"
def __init__( def __init__(
......
...@@ -135,12 +135,7 @@ class ReformerConfig(PretrainedConfig): ...@@ -135,12 +135,7 @@ class ReformerConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = REFORMER_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "reformer" model_type = "reformer"
def __init__( def __init__(
......
...@@ -35,7 +35,7 @@ ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -35,7 +35,7 @@ ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class RobertaConfig(BertConfig): class RobertaConfig(BertConfig):
r""" r"""
This is the configuration class to store the configuration of an :class:`~transformers.RobertaModel`. This is the configuration class to store the configuration of a :class:`~transformers.RobertaModel`.
It is used to instantiate an RoBERTa model according to the specified arguments, defining the model It is used to instantiate an RoBERTa model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the BERT `bert-base-uncased <https://huggingface.co/bert-base-uncased>`__ architecture. the BERT `bert-base-uncased <https://huggingface.co/bert-base-uncased>`__ architecture.
...@@ -59,12 +59,7 @@ class RobertaConfig(BertConfig): ...@@ -59,12 +59,7 @@ class RobertaConfig(BertConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "roberta" model_type = "roberta"
def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs): def __init__(self, pad_token_id=1, bos_token_id=0, eos_token_id=2, **kwargs):
......
...@@ -59,7 +59,6 @@ class T5Config(PretrainedConfig): ...@@ -59,7 +59,6 @@ class T5Config(PretrainedConfig):
initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing). initializer_factor: A factor for initializing all weight matrices (should be kept to 1.0, used for initialization testing).
layer_norm_eps: The epsilon used by LayerNorm. layer_norm_eps: The epsilon used by LayerNorm.
""" """
pretrained_config_archive_map = T5_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "t5" model_type = "t5"
def __init__( def __init__(
......
...@@ -30,7 +30,7 @@ TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -30,7 +30,7 @@ TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP = {
class TransfoXLConfig(PretrainedConfig): class TransfoXLConfig(PretrainedConfig):
""" """
This is the configuration class to store the configuration of an :class:`~transformers.TransfoXLModel`. This is the configuration class to store the configuration of a :class:`~transformers.TransfoXLModel`.
It is used to instantiate a Transformer XL model according to the specified arguments, defining the model It is used to instantiate a Transformer XL model according to the specified arguments, defining the model
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
the `Transformer XL <https://huggingface.co/transfo-xl-wt103>`__ architecture. the `Transformer XL <https://huggingface.co/transfo-xl-wt103>`__ architecture.
...@@ -110,13 +110,8 @@ class TransfoXLConfig(PretrainedConfig): ...@@ -110,13 +110,8 @@ class TransfoXLConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "transfo-xl" model_type = "transfo-xl"
def __init__( def __init__(
......
...@@ -20,7 +20,7 @@ import copy ...@@ -20,7 +20,7 @@ import copy
import json import json
import logging import logging
import os import os
from typing import Dict, Optional, Tuple from typing import Dict, Tuple
from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url from .file_utils import CONFIG_NAME, cached_path, hf_bucket_url, is_remote_url
...@@ -37,7 +37,6 @@ class PretrainedConfig(object): ...@@ -37,7 +37,6 @@ class PretrainedConfig(object):
It only affects the model's configuration. It only affects the model's configuration.
Class attributes (overridden by derived classes): Class attributes (overridden by derived classes):
- ``pretrained_config_archive_map``: a python ``dict`` with `shortcut names` (string) as keys and `url` (string) of associated pretrained model configurations as values.
- ``model_type``: a string that identifies the model type, that we serialize into the JSON file, and that we use to recreate the correct object in :class:`~transformers.AutoConfig`. - ``model_type``: a string that identifies the model type, that we serialize into the JSON file, and that we use to recreate the correct object in :class:`~transformers.AutoConfig`.
Args: Args:
...@@ -52,7 +51,6 @@ class PretrainedConfig(object): ...@@ -52,7 +51,6 @@ class PretrainedConfig(object):
torchscript (:obj:`bool`, `optional`, defaults to :obj:`False`): torchscript (:obj:`bool`, `optional`, defaults to :obj:`False`):
Is the model used with Torchscript (for PyTorch models). Is the model used with Torchscript (for PyTorch models).
""" """
pretrained_config_archive_map: Dict[str, str] = {}
model_type: str = "" model_type: str = ""
def __init__(self, **kwargs): def __init__(self, **kwargs):
...@@ -204,9 +202,7 @@ class PretrainedConfig(object): ...@@ -204,9 +202,7 @@ class PretrainedConfig(object):
return cls.from_dict(config_dict, **kwargs) return cls.from_dict(config_dict, **kwargs)
@classmethod @classmethod
def get_config_dict( def get_config_dict(cls, pretrained_model_name_or_path: str, **kwargs) -> Tuple[Dict, Dict]:
cls, pretrained_model_name_or_path: str, pretrained_config_archive_map: Optional[Dict] = None, **kwargs
) -> Tuple[Dict, Dict]:
""" """
From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used
for instantiating a Config using `from_dict`. for instantiating a Config using `from_dict`.
...@@ -214,8 +210,6 @@ class PretrainedConfig(object): ...@@ -214,8 +210,6 @@ class PretrainedConfig(object):
Parameters: Parameters:
pretrained_model_name_or_path (:obj:`string`): pretrained_model_name_or_path (:obj:`string`):
The identifier of the pre-trained checkpoint from which we want the dictionary of parameters. The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.
pretrained_config_archive_map: (:obj:`Dict[str, str]`, `optional`) Dict:
A map of `shortcut names` to `url`. By default, will use the current class attribute.
Returns: Returns:
:obj:`Tuple[Dict, Dict]`: The dictionary that will be used to instantiate the configuration object. :obj:`Tuple[Dict, Dict]`: The dictionary that will be used to instantiate the configuration object.
...@@ -227,12 +221,7 @@ class PretrainedConfig(object): ...@@ -227,12 +221,7 @@ class PretrainedConfig(object):
proxies = kwargs.pop("proxies", None) proxies = kwargs.pop("proxies", None)
local_files_only = kwargs.pop("local_files_only", False) local_files_only = kwargs.pop("local_files_only", False)
if pretrained_config_archive_map is None: if os.path.isdir(pretrained_model_name_or_path):
pretrained_config_archive_map = cls.pretrained_config_archive_map
if pretrained_model_name_or_path in pretrained_config_archive_map:
config_file = pretrained_config_archive_map[pretrained_model_name_or_path]
elif os.path.isdir(pretrained_model_name_or_path):
config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME) config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path): elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
config_file = pretrained_model_name_or_path config_file = pretrained_model_name_or_path
...@@ -255,21 +244,11 @@ class PretrainedConfig(object): ...@@ -255,21 +244,11 @@ class PretrainedConfig(object):
config_dict = cls._dict_from_json_file(resolved_config_file) config_dict = cls._dict_from_json_file(resolved_config_file)
except EnvironmentError: except EnvironmentError:
if pretrained_model_name_or_path in pretrained_config_archive_map: msg = (
msg = "Couldn't reach server at '{}' to download pretrained model configuration file.".format( f"Can't load config for '{pretrained_model_name_or_path}'. Make sure that:\n\n"
config_file f"- '{pretrained_model_name_or_path}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
) f"- or '{pretrained_model_name_or_path}' is the correct path to a directory containing a {CONFIG_NAME} file\n\n"
else: )
msg = (
"Can't load '{}'. Make sure that:\n\n"
"- '{}' is a correct model identifier listed on 'https://huggingface.co/models'\n\n"
"- or '{}' is the correct path to a directory containing a '{}' file\n\n".format(
pretrained_model_name_or_path,
pretrained_model_name_or_path,
pretrained_model_name_or_path,
CONFIG_NAME,
)
)
raise EnvironmentError(msg) raise EnvironmentError(msg)
except json.JSONDecodeError: except json.JSONDecodeError:
......
...@@ -152,13 +152,8 @@ class XLMConfig(PretrainedConfig): ...@@ -152,13 +152,8 @@ class XLMConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "xlm" model_type = "xlm"
def __init__( def __init__(
......
...@@ -39,5 +39,4 @@ class XLMRobertaConfig(RobertaConfig): ...@@ -39,5 +39,4 @@ class XLMRobertaConfig(RobertaConfig):
superclass for the appropriate documentation alongside usage examples. superclass for the appropriate documentation alongside usage examples.
""" """
pretrained_config_archive_map = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "xlm-roberta" model_type = "xlm-roberta"
...@@ -122,13 +122,8 @@ class XLNetConfig(PretrainedConfig): ...@@ -122,13 +122,8 @@ class XLNetConfig(PretrainedConfig):
# Accessing the model configuration # Accessing the model configuration
configuration = model.config configuration = model.config
Attributes:
pretrained_config_archive_map (Dict[str, str]):
A dictionary containing all the available pre-trained checkpoints.
""" """
pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
model_type = "xlnet" model_type = "xlnet"
def __init__( def __init__(
......
...@@ -32,6 +32,7 @@ from transformers import ( ...@@ -32,6 +32,7 @@ from transformers import (
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP, T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
WEIGHTS_NAME,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
...@@ -70,6 +71,7 @@ from transformers import ( ...@@ -70,6 +71,7 @@ from transformers import (
XLMRobertaConfig, XLMRobertaConfig,
XLNetConfig, XLNetConfig,
cached_path, cached_path,
hf_bucket_url,
is_torch_available, is_torch_available,
load_pytorch_checkpoint_in_tf2_model, load_pytorch_checkpoint_in_tf2_model,
) )
...@@ -82,261 +84,103 @@ if is_torch_available(): ...@@ -82,261 +84,103 @@ if is_torch_available():
BertForPreTraining, BertForPreTraining,
BertForQuestionAnswering, BertForQuestionAnswering,
BertForSequenceClassification, BertForSequenceClassification,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2LMHeadModel, GPT2LMHeadModel,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
XLNetLMHeadModel, XLNetLMHeadModel,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
XLMWithLMHeadModel, XLMWithLMHeadModel,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
XLMRobertaForMaskedLM, XLMRobertaForMaskedLM,
TransfoXLLMHeadModel, TransfoXLLMHeadModel,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
OpenAIGPTLMHeadModel, OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
RobertaForMaskedLM, RobertaForMaskedLM,
RobertaForSequenceClassification, RobertaForSequenceClassification,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
CamembertForMaskedLM, CamembertForMaskedLM,
CamembertForSequenceClassification,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
FlaubertWithLMHeadModel, FlaubertWithLMHeadModel,
DistilBertForMaskedLM, DistilBertForMaskedLM,
DistilBertForQuestionAnswering, DistilBertForQuestionAnswering,
DistilBertForSequenceClassification,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
CTRLLMHeadModel, CTRLLMHeadModel,
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
AlbertForPreTraining, AlbertForPreTraining,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
T5ForConditionalGeneration, T5ForConditionalGeneration,
T5_PRETRAINED_MODEL_ARCHIVE_MAP,
ElectraForPreTraining, ElectraForPreTraining,
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
else:
(
BertForPreTraining,
BertForQuestionAnswering,
BertForSequenceClassification,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2LMHeadModel,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
XLNetLMHeadModel,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
XLMWithLMHeadModel,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
XLMRobertaForMaskedLM,
TransfoXLLMHeadModel,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
RobertaForMaskedLM,
RobertaForSequenceClassification,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
CamembertForMaskedLM,
CamembertForSequenceClassification,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
FlaubertWithLMHeadModel,
DistilBertForMaskedLM,
DistilBertForSequenceClassification,
DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
CTRLLMHeadModel,
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
AlbertForPreTraining,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
T5ForConditionalGeneration,
T5_PRETRAINED_MODEL_ARCHIVE_MAP,
ElectraForPreTraining,
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
) = (
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
) )
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
MODEL_CLASSES = { MODEL_CLASSES = {
"bert": ( "bert": (BertConfig, TFBertForPreTraining, BertForPreTraining, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,),
BertConfig,
TFBertForPreTraining,
BertForPreTraining,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"bert-large-uncased-whole-word-masking-finetuned-squad": ( "bert-large-uncased-whole-word-masking-finetuned-squad": (
BertConfig, BertConfig,
TFBertForQuestionAnswering, TFBertForQuestionAnswering,
BertForQuestionAnswering, BertForQuestionAnswering,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"bert-large-cased-whole-word-masking-finetuned-squad": ( "bert-large-cased-whole-word-masking-finetuned-squad": (
BertConfig, BertConfig,
TFBertForQuestionAnswering, TFBertForQuestionAnswering,
BertForQuestionAnswering, BertForQuestionAnswering,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"bert-base-cased-finetuned-mrpc": ( "bert-base-cased-finetuned-mrpc": (
BertConfig, BertConfig,
TFBertForSequenceClassification, TFBertForSequenceClassification,
BertForSequenceClassification, BertForSequenceClassification,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"gpt2": ( "gpt2": (GPT2Config, TFGPT2LMHeadModel, GPT2LMHeadModel, GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,),
GPT2Config, "xlnet": (XLNetConfig, TFXLNetLMHeadModel, XLNetLMHeadModel, XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,),
TFGPT2LMHeadModel, "xlm": (XLMConfig, TFXLMWithLMHeadModel, XLMWithLMHeadModel, XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,),
GPT2LMHeadModel,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlnet": (
XLNetConfig,
TFXLNetLMHeadModel,
XLNetLMHeadModel,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm": (
XLMConfig,
TFXLMWithLMHeadModel,
XLMWithLMHeadModel,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"xlm-roberta": ( "xlm-roberta": (
XLMRobertaConfig, XLMRobertaConfig,
TFXLMRobertaForMaskedLM, TFXLMRobertaForMaskedLM,
XLMRobertaForMaskedLM, XLMRobertaForMaskedLM,
XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"transfo-xl": ( "transfo-xl": (
TransfoXLConfig, TransfoXLConfig,
TFTransfoXLLMHeadModel, TFTransfoXLLMHeadModel,
TransfoXLLMHeadModel, TransfoXLLMHeadModel,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP, TRANSFO_XL_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"openai-gpt": ( "openai-gpt": (
OpenAIGPTConfig, OpenAIGPTConfig,
TFOpenAIGPTLMHeadModel, TFOpenAIGPTLMHeadModel,
OpenAIGPTLMHeadModel, OpenAIGPTLMHeadModel,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"roberta": ( "roberta": (RobertaConfig, TFRobertaForMaskedLM, RobertaForMaskedLM, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,),
RobertaConfig,
TFRobertaForMaskedLM,
RobertaForMaskedLM,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"roberta-large-mnli": ( "roberta-large-mnli": (
RobertaConfig, RobertaConfig,
TFRobertaForSequenceClassification, TFRobertaForSequenceClassification,
RobertaForSequenceClassification, RobertaForSequenceClassification,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"camembert": ( "camembert": (
CamembertConfig, CamembertConfig,
TFCamembertForMaskedLM, TFCamembertForMaskedLM,
CamembertForMaskedLM, CamembertForMaskedLM,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, CAMEMBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"flaubert": ( "flaubert": (
FlaubertConfig, FlaubertConfig,
TFFlaubertWithLMHeadModel, TFFlaubertWithLMHeadModel,
FlaubertWithLMHeadModel, FlaubertWithLMHeadModel,
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, FLAUBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"distilbert": ( "distilbert": (
DistilBertConfig, DistilBertConfig,
TFDistilBertForMaskedLM, TFDistilBertForMaskedLM,
DistilBertForMaskedLM, DistilBertForMaskedLM,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"distilbert-base-distilled-squad": ( "distilbert-base-distilled-squad": (
DistilBertConfig, DistilBertConfig,
TFDistilBertForQuestionAnswering, TFDistilBertForQuestionAnswering,
DistilBertForQuestionAnswering, DistilBertForQuestionAnswering,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
), ),
"ctrl": ( "ctrl": (CTRLConfig, TFCTRLLMHeadModel, CTRLLMHeadModel, CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,),
CTRLConfig, "albert": (AlbertConfig, TFAlbertForPreTraining, AlbertForPreTraining, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,),
TFCTRLLMHeadModel, "t5": (T5Config, TFT5ForConditionalGeneration, T5ForConditionalGeneration, T5_PRETRAINED_CONFIG_ARCHIVE_MAP,),
CTRLLMHeadModel, "electra": (ElectraConfig, TFElectraForPreTraining, ElectraForPreTraining, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,),
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
CTRL_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"albert": (
AlbertConfig,
TFAlbertForPreTraining,
AlbertForPreTraining,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"t5": (
T5Config,
TFT5ForConditionalGeneration,
T5ForConditionalGeneration,
T5_PRETRAINED_MODEL_ARCHIVE_MAP,
T5_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
"electra": (
ElectraConfig,
TFElectraForPreTraining,
ElectraForPreTraining,
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP,
),
} }
...@@ -346,7 +190,7 @@ def convert_pt_checkpoint_to_tf( ...@@ -346,7 +190,7 @@ def convert_pt_checkpoint_to_tf(
if model_type not in MODEL_CLASSES: if model_type not in MODEL_CLASSES:
raise ValueError("Unrecognized model type, should be one of {}.".format(list(MODEL_CLASSES.keys()))) raise ValueError("Unrecognized model type, should be one of {}.".format(list(MODEL_CLASSES.keys())))
config_class, model_class, pt_model_class, aws_model_maps, aws_config_map = MODEL_CLASSES[model_type] config_class, model_class, pt_model_class, aws_config_map = MODEL_CLASSES[model_type]
# Initialise TF model # Initialise TF model
if config_file in aws_config_map: if config_file in aws_config_map:
...@@ -358,10 +202,9 @@ def convert_pt_checkpoint_to_tf( ...@@ -358,10 +202,9 @@ def convert_pt_checkpoint_to_tf(
tf_model = model_class(config) tf_model = model_class(config)
# Load weights from tf checkpoint # Load weights from tf checkpoint
if pytorch_checkpoint_path in aws_model_maps: if pytorch_checkpoint_path in aws_config_map.keys():
pytorch_checkpoint_path = cached_path( pytorch_checkpoint_url = hf_bucket_url(pytorch_checkpoint_path, filename=WEIGHTS_NAME)
aws_model_maps[pytorch_checkpoint_path], force_download=not use_cached_models pytorch_checkpoint_path = cached_path(pytorch_checkpoint_url, force_download=not use_cached_models)
)
# Load PyTorch checkpoint in tf2 model: # Load PyTorch checkpoint in tf2 model:
tf_model = load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path) tf_model = load_pytorch_checkpoint_in_tf2_model(tf_model, pytorch_checkpoint_path)
......
...@@ -31,16 +31,17 @@ from .modeling_utils import PreTrainedModel ...@@ -31,16 +31,17 @@ from .modeling_utils import PreTrainedModel
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP = { ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST = [
"albert-base-v1": "https://cdn.huggingface.co/albert-base-v1-pytorch_model.bin", "albert-base-v1",
"albert-large-v1": "https://cdn.huggingface.co/albert-large-v1-pytorch_model.bin", "albert-large-v1",
"albert-xlarge-v1": "https://cdn.huggingface.co/albert-xlarge-v1-pytorch_model.bin", "albert-xlarge-v1",
"albert-xxlarge-v1": "https://cdn.huggingface.co/albert-xxlarge-v1-pytorch_model.bin", "albert-xxlarge-v1",
"albert-base-v2": "https://cdn.huggingface.co/albert-base-v2-pytorch_model.bin", "albert-base-v2",
"albert-large-v2": "https://cdn.huggingface.co/albert-large-v2-pytorch_model.bin", "albert-large-v2",
"albert-xlarge-v2": "https://cdn.huggingface.co/albert-xlarge-v2-pytorch_model.bin", "albert-xlarge-v2",
"albert-xxlarge-v2": "https://cdn.huggingface.co/albert-xxlarge-v2-pytorch_model.bin", "albert-xxlarge-v2",
} # See all ALBERT models at https://huggingface.co/models?filter=albert
]
def load_tf_weights_in_albert(model, config, tf_checkpoint_path): def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
...@@ -365,7 +366,6 @@ class AlbertPreTrainedModel(PreTrainedModel): ...@@ -365,7 +366,6 @@ class AlbertPreTrainedModel(PreTrainedModel):
""" """
config_class = AlbertConfig config_class = AlbertConfig
pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix = "albert" base_model_prefix = "albert"
def _init_weights(self, module): def _init_weights(self, module):
...@@ -439,7 +439,6 @@ ALBERT_INPUTS_DOCSTRING = r""" ...@@ -439,7 +439,6 @@ ALBERT_INPUTS_DOCSTRING = r"""
class AlbertModel(AlbertPreTrainedModel): class AlbertModel(AlbertPreTrainedModel):
config_class = AlbertConfig config_class = AlbertConfig
pretrained_model_archive_map = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights = load_tf_weights_in_albert load_tf_weights = load_tf_weights_in_albert
base_model_prefix = "albert" base_model_prefix = "albert"
......
...@@ -43,7 +43,6 @@ from .configuration_auto import ( ...@@ -43,7 +43,6 @@ from .configuration_auto import (
from .configuration_marian import MarianConfig from .configuration_marian import MarianConfig
from .configuration_utils import PretrainedConfig from .configuration_utils import PretrainedConfig
from .modeling_albert import ( from .modeling_albert import (
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
AlbertForMaskedLM, AlbertForMaskedLM,
AlbertForPreTraining, AlbertForPreTraining,
AlbertForQuestionAnswering, AlbertForQuestionAnswering,
...@@ -51,14 +50,8 @@ from .modeling_albert import ( ...@@ -51,14 +50,8 @@ from .modeling_albert import (
AlbertForTokenClassification, AlbertForTokenClassification,
AlbertModel, AlbertModel,
) )
from .modeling_bart import ( from .modeling_bart import BartForConditionalGeneration, BartForSequenceClassification, BartModel
BART_PRETRAINED_MODEL_ARCHIVE_MAP,
BartForConditionalGeneration,
BartForSequenceClassification,
BartModel,
)
from .modeling_bert import ( from .modeling_bert import (
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BertForMaskedLM, BertForMaskedLM,
BertForMultipleChoice, BertForMultipleChoice,
BertForPreTraining, BertForPreTraining,
...@@ -68,16 +61,14 @@ from .modeling_bert import ( ...@@ -68,16 +61,14 @@ from .modeling_bert import (
BertModel, BertModel,
) )
from .modeling_camembert import ( from .modeling_camembert import (
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
CamembertForMaskedLM, CamembertForMaskedLM,
CamembertForMultipleChoice, CamembertForMultipleChoice,
CamembertForSequenceClassification, CamembertForSequenceClassification,
CamembertForTokenClassification, CamembertForTokenClassification,
CamembertModel, CamembertModel,
) )
from .modeling_ctrl import CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel, CTRLModel from .modeling_ctrl import CTRLLMHeadModel, CTRLModel
from .modeling_distilbert import ( from .modeling_distilbert import (
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
DistilBertForMaskedLM, DistilBertForMaskedLM,
DistilBertForQuestionAnswering, DistilBertForQuestionAnswering,
DistilBertForSequenceClassification, DistilBertForSequenceClassification,
...@@ -85,7 +76,6 @@ from .modeling_distilbert import ( ...@@ -85,7 +76,6 @@ from .modeling_distilbert import (
DistilBertModel, DistilBertModel,
) )
from .modeling_electra import ( from .modeling_electra import (
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
ElectraForMaskedLM, ElectraForMaskedLM,
ElectraForPreTraining, ElectraForPreTraining,
ElectraForSequenceClassification, ElectraForSequenceClassification,
...@@ -94,15 +84,13 @@ from .modeling_electra import ( ...@@ -94,15 +84,13 @@ from .modeling_electra import (
) )
from .modeling_encoder_decoder import EncoderDecoderModel from .modeling_encoder_decoder import EncoderDecoderModel
from .modeling_flaubert import ( from .modeling_flaubert import (
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
FlaubertForQuestionAnsweringSimple, FlaubertForQuestionAnsweringSimple,
FlaubertForSequenceClassification, FlaubertForSequenceClassification,
FlaubertModel, FlaubertModel,
FlaubertWithLMHeadModel, FlaubertWithLMHeadModel,
) )
from .modeling_gpt2 import GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, GPT2LMHeadModel, GPT2Model from .modeling_gpt2 import GPT2LMHeadModel, GPT2Model
from .modeling_longformer import ( from .modeling_longformer import (
LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP,
LongformerForMaskedLM, LongformerForMaskedLM,
LongformerForMultipleChoice, LongformerForMultipleChoice,
LongformerForQuestionAnswering, LongformerForQuestionAnswering,
...@@ -111,10 +99,9 @@ from .modeling_longformer import ( ...@@ -111,10 +99,9 @@ from .modeling_longformer import (
LongformerModel, LongformerModel,
) )
from .modeling_marian import MarianMTModel from .modeling_marian import MarianMTModel
from .modeling_openai import OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, OpenAIGPTLMHeadModel, OpenAIGPTModel from .modeling_openai import OpenAIGPTLMHeadModel, OpenAIGPTModel
from .modeling_reformer import ReformerModel, ReformerModelWithLMHead from .modeling_reformer import ReformerModel, ReformerModelWithLMHead
from .modeling_roberta import ( from .modeling_roberta import (
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
RobertaForMaskedLM, RobertaForMaskedLM,
RobertaForMultipleChoice, RobertaForMultipleChoice,
RobertaForQuestionAnswering, RobertaForQuestionAnswering,
...@@ -122,10 +109,9 @@ from .modeling_roberta import ( ...@@ -122,10 +109,9 @@ from .modeling_roberta import (
RobertaForTokenClassification, RobertaForTokenClassification,
RobertaModel, RobertaModel,
) )
from .modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5ForConditionalGeneration, T5Model from .modeling_t5 import T5ForConditionalGeneration, T5Model
from .modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP, TransfoXLLMHeadModel, TransfoXLModel from .modeling_transfo_xl import TransfoXLLMHeadModel, TransfoXLModel
from .modeling_xlm import ( from .modeling_xlm import (
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
XLMForQuestionAnsweringSimple, XLMForQuestionAnsweringSimple,
XLMForSequenceClassification, XLMForSequenceClassification,
XLMForTokenClassification, XLMForTokenClassification,
...@@ -133,7 +119,6 @@ from .modeling_xlm import ( ...@@ -133,7 +119,6 @@ from .modeling_xlm import (
XLMWithLMHeadModel, XLMWithLMHeadModel,
) )
from .modeling_xlm_roberta import ( from .modeling_xlm_roberta import (
XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
XLMRobertaForMaskedLM, XLMRobertaForMaskedLM,
XLMRobertaForMultipleChoice, XLMRobertaForMultipleChoice,
XLMRobertaForSequenceClassification, XLMRobertaForSequenceClassification,
...@@ -141,7 +126,6 @@ from .modeling_xlm_roberta import ( ...@@ -141,7 +126,6 @@ from .modeling_xlm_roberta import (
XLMRobertaModel, XLMRobertaModel,
) )
from .modeling_xlnet import ( from .modeling_xlnet import (
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
XLNetForMultipleChoice, XLNetForMultipleChoice,
XLNetForQuestionAnsweringSimple, XLNetForQuestionAnsweringSimple,
XLNetForSequenceClassification, XLNetForSequenceClassification,
...@@ -154,30 +138,6 @@ from .modeling_xlnet import ( ...@@ -154,30 +138,6 @@ from .modeling_xlnet import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict(
(key, value)
for pretrained_map in [
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
BART_PRETRAINED_MODEL_ARCHIVE_MAP,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
T5_PRETRAINED_MODEL_ARCHIVE_MAP,
FLAUBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP,
LONGFORMER_PRETRAINED_MODEL_ARCHIVE_MAP,
]
for key, value, in pretrained_map.items()
)
MODEL_MAPPING = OrderedDict( MODEL_MAPPING = OrderedDict(
[ [
(T5Config, T5Model), (T5Config, T5Model),
...@@ -372,29 +332,26 @@ class AutoModel: ...@@ -372,29 +332,26 @@ class AutoModel:
The `from_pretrained()` method takes care of returning the correct model class instance The `from_pretrained()` method takes care of returning the correct model class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string. falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `t5`: :class:`~transformers.T5Model` (T5 model)
The base model class to instantiate is selected as the first pattern matching - `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model)
in the `pretrained_model_name_or_path` string (in the following order): - `albert`: :class:`~transformers.AlbertModel` (ALBERT model)
- contains `t5`: :class:`~transformers.T5Model` (T5 model) - `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model)
- contains `distilbert`: :class:`~transformers.DistilBertModel` (DistilBERT model) - `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model)
- contains `albert`: :class:`~transformers.AlbertModel` (ALBERT model) - `longformer` :class:`~transformers.LongformerModel` (Longformer model)
- contains `camembert`: :class:`~transformers.CamembertModel` (CamemBERT model) - `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model)
- contains `xlm-roberta`: :class:`~transformers.XLMRobertaModel` (XLM-RoBERTa model) - `bert`: :class:`~transformers.BertModel` (Bert model)
- contains `longformer` :class:`~transformers.LongformerModel` (Longformer model) - `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model)
- contains `roberta`: :class:`~transformers.RobertaModel` (RoBERTa model) - `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model)
- contains `bert`: :class:`~transformers.BertModel` (Bert model) - `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model)
- contains `openai-gpt`: :class:`~transformers.OpenAIGPTModel` (OpenAI GPT model) - `xlnet`: :class:`~transformers.XLNetModel` (XLNet model)
- contains `gpt2`: :class:`~transformers.GPT2Model` (OpenAI GPT-2 model) - `xlm`: :class:`~transformers.XLMModel` (XLM model)
- contains `transfo-xl`: :class:`~transformers.TransfoXLModel` (Transformer-XL model) - `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL model)
- contains `xlnet`: :class:`~transformers.XLNetModel` (XLNet model) - `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert model)
- contains `xlm`: :class:`~transformers.XLMModel` (XLM model) - `electra`: :class:`~transformers.ElectraModel` (Electra model)
- contains `ctrl`: :class:`~transformers.CTRLModel` (Salesforce CTRL model)
- contains `flaubert`: :class:`~transformers.FlaubertModel` (Flaubert model) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
- contains `electra`: :class:`~transformers.ElectraModel` (Electra model) To train the model, you should first set it back in training mode with `model.train()`
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()`
Args: Args:
pretrained_model_name_or_path: either: pretrained_model_name_or_path: either:
...@@ -528,26 +485,23 @@ class AutoModelForPreTraining: ...@@ -528,26 +485,23 @@ class AutoModelForPreTraining:
The `from_pretrained()` method takes care of returning the correct model class instance The `from_pretrained()` method takes care of returning the correct model class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string. falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
The model class to instantiate is selected as the first pattern matching - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
in the `pretrained_model_name_or_path` string (in the following order): - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
- contains `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model) - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
- contains `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model) - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
- contains `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model) - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
- contains `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model) - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
- contains `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model) - `bert`: :class:`~transformers.BertForPreTraining` (Bert model)
- contains `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model) - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
- contains `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model) - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
- contains `bert`: :class:`~transformers.BertForPreTraining` (Bert model) - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
- contains `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model) - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
- contains `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model) - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
- contains `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model) - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
- contains `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model) - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
- contains `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model) - `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model)
- contains `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
- contains `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
- contains `electra`: :class:`~transformers.ElectraForPreTraining` (Electra model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()` To train the model, you should first set it back in training mode with `model.train()`
...@@ -679,26 +633,23 @@ class AutoModelWithLMHead: ...@@ -679,26 +633,23 @@ class AutoModelWithLMHead:
The `from_pretrained()` method takes care of returning the correct model class instance The `from_pretrained()` method takes care of returning the correct model class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string. falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model)
The model class to instantiate is selected as the first pattern matching - `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model)
in the `pretrained_model_name_or_path` string (in the following order): - `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model)
- contains `t5`: :class:`~transformers.T5ModelWithLMHead` (T5 model) - `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model)
- contains `distilbert`: :class:`~transformers.DistilBertForMaskedLM` (DistilBERT model) - `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model)
- contains `albert`: :class:`~transformers.AlbertForMaskedLM` (ALBERT model) - `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model)
- contains `camembert`: :class:`~transformers.CamembertForMaskedLM` (CamemBERT model) - `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model)
- contains `xlm-roberta`: :class:`~transformers.XLMRobertaForMaskedLM` (XLM-RoBERTa model) - `bert`: :class:`~transformers.BertForMaskedLM` (Bert model)
- contains `longformer`: :class:`~transformers.LongformerForMaskedLM` (Longformer model) - `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model)
- contains `roberta`: :class:`~transformers.RobertaForMaskedLM` (RoBERTa model) - `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model)
- contains `bert`: :class:`~transformers.BertForMaskedLM` (Bert model) - `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model)
- contains `openai-gpt`: :class:`~transformers.OpenAIGPTLMHeadModel` (OpenAI GPT model) - `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model)
- contains `gpt2`: :class:`~transformers.GPT2LMHeadModel` (OpenAI GPT-2 model) - `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model)
- contains `transfo-xl`: :class:`~transformers.TransfoXLLMHeadModel` (Transformer-XL model) - `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
- contains `xlnet`: :class:`~transformers.XLNetLMHeadModel` (XLNet model) - `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
- contains `xlm`: :class:`~transformers.XLMWithLMHeadModel` (XLM model) - `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
- contains `ctrl`: :class:`~transformers.CTRLLMHeadModel` (Salesforce CTRL model)
- contains `flaubert`: :class:`~transformers.FlaubertWithLMHeadModel` (Flaubert model)
- contains `electra`: :class:`~transformers.ElectraForMaskedLM` (Electra model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()` To train the model, you should first set it back in training mode with `model.train()`
...@@ -830,18 +781,15 @@ class AutoModelForSequenceClassification: ...@@ -830,18 +781,15 @@ class AutoModelForSequenceClassification:
The `from_pretrained()` method takes care of returning the correct model class instance The `from_pretrained()` method takes care of returning the correct model class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string. falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model)
The model class to instantiate is selected as the first pattern matching - `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model)
in the `pretrained_model_name_or_path` string (in the following order): - `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model)
- contains `distilbert`: :class:`~transformers.DistilBertForSequenceClassification` (DistilBERT model) - `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model)
- contains `albert`: :class:`~transformers.AlbertForSequenceClassification` (ALBERT model) - `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model)
- contains `camembert`: :class:`~transformers.CamembertForSequenceClassification` (CamemBERT model) - `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model)
- contains `xlm-roberta`: :class:`~transformers.XLMRobertaForSequenceClassification` (XLM-RoBERTa model) - `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
- contains `roberta`: :class:`~transformers.RobertaForSequenceClassification` (RoBERTa model) - `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
- contains `bert`: :class:`~transformers.BertForSequenceClassification` (Bert model)
- contains `xlnet`: :class:`~transformers.XLNetForSequenceClassification` (XLNet model)
- contains `flaubert`: :class:`~transformers.FlaubertForSequenceClassification` (Flaubert model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()` To train the model, you should first set it back in training mode with `model.train()`
...@@ -979,16 +927,13 @@ class AutoModelForQuestionAnswering: ...@@ -979,16 +927,13 @@ class AutoModelForQuestionAnswering:
The `from_pretrained()` method takes care of returning the correct model class instance The `from_pretrained()` method takes care of returning the correct model class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string. falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model)
The model class to instantiate is selected as the first pattern matching - `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model)
in the `pretrained_model_name_or_path` string (in the following order): - `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model)
- contains `distilbert`: :class:`~transformers.DistilBertForQuestionAnswering` (DistilBERT model) - `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
- contains `albert`: :class:`~transformers.AlbertForQuestionAnswering` (ALBERT model) - `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
- contains `bert`: :class:`~transformers.BertForQuestionAnswering` (Bert model) - `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
- contains `xlnet`: :class:`~transformers.XLNetForQuestionAnswering` (XLNet model)
- contains `xlm`: :class:`~transformers.XLMForQuestionAnswering` (XLM model)
- contains `flaubert`: :class:`~transformers.FlaubertForQuestionAnswering` (XLM model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()` To train the model, you should first set it back in training mode with `model.train()`
...@@ -1127,18 +1072,15 @@ class AutoModelForTokenClassification: ...@@ -1127,18 +1072,15 @@ class AutoModelForTokenClassification:
The `from_pretrained()` method takes care of returning the correct model class instance The `from_pretrained()` method takes care of returning the correct model class instance
based on the `model_type` property of the config object, or when it's missing, based on the `model_type` property of the config object, or when it's missing,
falling back to using pattern matching on the `pretrained_model_name_or_path` string. falling back to using pattern matching on the `pretrained_model_name_or_path` string:
- `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model)
The model class to instantiate is selected as the first pattern matching - `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model)
in the `pretrained_model_name_or_path` string (in the following order): - `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model)
- contains `distilbert`: :class:`~transformers.DistilBertForTokenClassification` (DistilBERT model) - `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model)
- contains `xlm`: :class:`~transformers.XLMForTokenClassification` (XLM model) - `bert`: :class:`~transformers.BertForTokenClassification` (Bert model)
- contains `xlm-roberta`: :class:`~transformers.XLMRobertaForTokenClassification` (XLM-RoBERTa?Para model) - `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model)
- contains `camembert`: :class:`~transformers.CamembertForTokenClassification` (Camembert model) - `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model)
- contains `bert`: :class:`~transformers.BertForTokenClassification` (Bert model) - `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model)
- contains `xlnet`: :class:`~transformers.XLNetForTokenClassification` (XLNet model)
- contains `roberta`: :class:`~transformers.RobertaForTokenClassification` (Roberta model)
- contains `electra`: :class:`~transformers.ElectraForTokenClassification` (Electra model)
The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated) The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with `model.train()` To train the model, you should first set it back in training mode with `model.train()`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment