Commit 83bc5235 authored by thomwolf's avatar thomwolf
Browse files

Merge branch 'master' into pr/2189

parents f349826a f0616062
...@@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig): ...@@ -42,7 +42,7 @@ class XLMConfig(PretrainedConfig):
"""Configuration class to store the configuration of a `XLMModel`. """Configuration class to store the configuration of a `XLMModel`.
Args: Args:
vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `XLMModel`. vocab_size: Vocabulary size of `inputs_ids` in `XLMModel`.
d_model: Size of the encoder layers and the pooler layer. d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder. n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in n_head: Number of attention heads for each attention layer in
...@@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig): ...@@ -81,7 +81,7 @@ class XLMConfig(PretrainedConfig):
pretrained_config_archive_map = XLM_PRETRAINED_CONFIG_ARCHIVE_MAP pretrained_config_archive_map = XLM_PRETRAINED_CONFIG_ARCHIVE_MAP
def __init__(self, def __init__(self,
vocab_size_or_config_json_file=30145, vocab_size=30145,
emb_dim=2048, emb_dim=2048,
n_layers=12, n_layers=12,
n_heads=16, n_heads=16,
...@@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig): ...@@ -103,9 +103,6 @@ class XLMConfig(PretrainedConfig):
unk_index=3, unk_index=3,
mask_index=5, mask_index=5,
is_encoder=True, is_encoder=True,
finetuning_task=None,
num_labels=2,
summary_type='first', summary_type='first',
summary_use_proj=True, summary_use_proj=True,
summary_activation=None, summary_activation=None,
...@@ -117,56 +114,43 @@ class XLMConfig(PretrainedConfig): ...@@ -117,56 +114,43 @@ class XLMConfig(PretrainedConfig):
"""Constructs XLMConfig. """Constructs XLMConfig.
""" """
super(XLMConfig, self).__init__(**kwargs) super(XLMConfig, self).__init__(**kwargs)
self.vocab_size = vocab_size
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 self.emb_dim = emb_dim
and isinstance(vocab_size_or_config_json_file, unicode)): self.n_layers = n_layers
with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: self.n_heads = n_heads
json_config = json.loads(reader.read()) self.dropout = dropout
for key, value in json_config.items(): self.attention_dropout = attention_dropout
self.__dict__[key] = value self.gelu_activation = gelu_activation
elif isinstance(vocab_size_or_config_json_file, int): self.sinusoidal_embeddings = sinusoidal_embeddings
self.n_words = vocab_size_or_config_json_file self.causal = causal
self.emb_dim = emb_dim self.asm = asm
self.n_layers = n_layers self.n_langs = n_langs
self.n_heads = n_heads self.use_lang_emb = use_lang_emb
self.dropout = dropout self.layer_norm_eps = layer_norm_eps
self.attention_dropout = attention_dropout self.bos_index = bos_index
self.gelu_activation = gelu_activation self.eos_index = eos_index
self.sinusoidal_embeddings = sinusoidal_embeddings self.pad_index = pad_index
self.causal = causal self.unk_index = unk_index
self.asm = asm self.mask_index = mask_index
self.n_langs = n_langs self.is_encoder = is_encoder
self.use_lang_emb = use_lang_emb self.max_position_embeddings = max_position_embeddings
self.layer_norm_eps = layer_norm_eps self.embed_init_std = embed_init_std
self.bos_index = bos_index self.init_std = init_std
self.eos_index = eos_index self.summary_type = summary_type
self.pad_index = pad_index self.summary_use_proj = summary_use_proj
self.unk_index = unk_index self.summary_activation = summary_activation
self.mask_index = mask_index self.summary_proj_to_labels = summary_proj_to_labels
self.is_encoder = is_encoder self.summary_first_dropout = summary_first_dropout
self.max_position_embeddings = max_position_embeddings self.start_n_top = start_n_top
self.embed_init_std = embed_init_std self.end_n_top = end_n_top
self.init_std = init_std
self.finetuning_task = finetuning_task
self.num_labels = num_labels
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.summary_activation = summary_activation
self.summary_proj_to_labels = summary_proj_to_labels
self.summary_first_dropout = summary_first_dropout
self.start_n_top = start_n_top
self.end_n_top = end_n_top
else:
raise ValueError("First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)")
@property @property
def vocab_size(self): def n_words(self): # For backward compatibility
return self.n_words return self.vocab_size
@vocab_size.setter @n_words.setter
def vocab_size(self, value): def n_words(self, value): # For backward compatibility
self.n_words = value self.vocab_size = value
@property @property
def hidden_size(self): def hidden_size(self):
......
...@@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig): ...@@ -35,7 +35,7 @@ class XLNetConfig(PretrainedConfig):
"""Configuration class to store the configuration of a ``XLNetModel``. """Configuration class to store the configuration of a ``XLNetModel``.
Args: Args:
vocab_size_or_config_json_file: Vocabulary size of ``inputs_ids`` in ``XLNetModel``. vocab_size: Vocabulary size of ``inputs_ids`` in ``XLNetModel``.
d_model: Size of the encoder layers and the pooler layer. d_model: Size of the encoder layers and the pooler layer.
n_layer: Number of hidden layers in the Transformer encoder. n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in n_head: Number of attention heads for each attention layer in
...@@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig): ...@@ -72,28 +72,22 @@ class XLNetConfig(PretrainedConfig):
pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP pretrained_config_archive_map = XLNET_PRETRAINED_CONFIG_ARCHIVE_MAP
def __init__(self, def __init__(self,
vocab_size_or_config_json_file=32000, vocab_size=32000,
d_model=1024, d_model=1024,
n_layer=24, n_layer=24,
n_head=16, n_head=16,
d_inner=4096, d_inner=4096,
max_position_embeddings=512,
ff_activation="gelu", ff_activation="gelu",
untie_r=True, untie_r=True,
attn_type="bi", attn_type="bi",
initializer_range=0.02, initializer_range=0.02,
layer_norm_eps=1e-12, layer_norm_eps=1e-12,
dropout=0.1, dropout=0.1,
mem_len=None, mem_len=None,
reuse_len=None, reuse_len=None,
bi_data=False, bi_data=False,
clamp_len=-1, clamp_len=-1,
same_length=False, same_length=False,
finetuning_task=None,
num_labels=2,
summary_type='last', summary_type='last',
summary_use_proj=True, summary_use_proj=True,
summary_activation='tanh', summary_activation='tanh',
...@@ -104,58 +98,45 @@ class XLNetConfig(PretrainedConfig): ...@@ -104,58 +98,45 @@ class XLNetConfig(PretrainedConfig):
"""Constructs XLNetConfig. """Constructs XLNetConfig.
""" """
super(XLNetConfig, self).__init__(**kwargs) super(XLNetConfig, self).__init__(**kwargs)
self.vocab_size = vocab_size
if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 self.d_model = d_model
and isinstance(vocab_size_or_config_json_file, unicode)): self.n_layer = n_layer
with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: self.n_head = n_head
json_config = json.loads(reader.read()) assert d_model % n_head == 0
for key, value in json_config.items(): self.d_head = d_model // n_head
setattr(config, key, value) self.ff_activation = ff_activation
elif isinstance(vocab_size_or_config_json_file, int): self.d_inner = d_inner
self.n_token = vocab_size_or_config_json_file self.untie_r = untie_r
self.d_model = d_model self.attn_type = attn_type
self.n_layer = n_layer
self.n_head = n_head self.initializer_range = initializer_range
assert d_model % n_head == 0 self.layer_norm_eps = layer_norm_eps
self.d_head = d_model // n_head
self.ff_activation = ff_activation self.dropout = dropout
self.d_inner = d_inner self.mem_len = mem_len
self.untie_r = untie_r self.reuse_len = reuse_len
self.attn_type = attn_type self.bi_data = bi_data
self.clamp_len = clamp_len
self.initializer_range = initializer_range self.same_length = same_length
self.layer_norm_eps = layer_norm_eps
self.summary_type = summary_type
self.dropout = dropout self.summary_use_proj = summary_use_proj
self.mem_len = mem_len self.summary_activation = summary_activation
self.reuse_len = reuse_len self.summary_last_dropout = summary_last_dropout
self.bi_data = bi_data self.start_n_top = start_n_top
self.clamp_len = clamp_len self.end_n_top = end_n_top
self.same_length = same_length
self.finetuning_task = finetuning_task
self.num_labels = num_labels
self.summary_type = summary_type
self.summary_use_proj = summary_use_proj
self.summary_activation = summary_activation
self.summary_last_dropout = summary_last_dropout
self.start_n_top = start_n_top
self.end_n_top = end_n_top
else:
raise ValueError("First argument must be either a vocabulary size (int)"
" or the path to a pretrained model config file (str)")
@property @property
def max_position_embeddings(self): def max_position_embeddings(self):
return -1 return -1
@property @property
def vocab_size(self): def n_token(self): # Backward compatibility
return self.n_token return self.vocab_size
@vocab_size.setter @n_token.setter
def vocab_size(self, value): def n_token(self, value): # Backward compatibility
self.n_token = value self.vocab_size = value
@property @property
def hidden_size(self): def hidden_size(self):
......
...@@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_ ...@@ -46,7 +46,7 @@ def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_
roberta = FairseqRobertaModel.from_pretrained(roberta_checkpoint_path) roberta = FairseqRobertaModel.from_pretrained(roberta_checkpoint_path)
roberta.eval() # disable dropout roberta.eval() # disable dropout
config = BertConfig( config = BertConfig(
vocab_size_or_config_json_file=50265, vocab_size=50265,
hidden_size=roberta.args.encoder_embed_dim, hidden_size=roberta.args.encoder_embed_dim,
num_hidden_layers=roberta.args.encoder_layers, num_hidden_layers=roberta.args.encoder_layers,
num_attention_heads=roberta.args.encoder_attention_heads, num_attention_heads=roberta.args.encoder_attention_heads,
......
...@@ -72,7 +72,7 @@ WEIGHTS_NAME = "pytorch_model.bin" ...@@ -72,7 +72,7 @@ WEIGHTS_NAME = "pytorch_model.bin"
TF2_WEIGHTS_NAME = 'tf_model.h5' TF2_WEIGHTS_NAME = 'tf_model.h5'
TF_WEIGHTS_NAME = 'model.ckpt' TF_WEIGHTS_NAME = 'model.ckpt'
CONFIG_NAME = "config.json" CONFIG_NAME = "config.json"
MODEL_CARD_NAME = "model_card.json"
DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]] DUMMY_INPUTS = [[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]]
DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]] DUMMY_MASK = [[1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [0, 0, 0, 1, 1]]
......
# coding=utf-8
# Copyright 2018 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Configuration base class and utilities."""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import copy
import json
import logging
import os
from io import open
from .configuration_auto import ALL_PRETRAINED_CONFIG_ARCHIVE_MAP
from .file_utils import CONFIG_NAME, MODEL_CARD_NAME, cached_path, is_remote_url, hf_bucket_url
logger = logging.getLogger(__name__)
class ModelCard(object):
r""" Model Card class.
Store model card as well as methods for loading/downloading/saving model cards.
Please read the following paper for details and explanation on the sections:
"Model Cards for Model Reporting"
by Margaret Mitchell, Simone Wu,
Andrew Zaldivar, Parker Barnes, Lucy Vasserman, Ben Hutchinson, Elena Spitzer,
Inioluwa Deborah Raji and Timnit Gebru for the proposal behind model cards.
Link: https://arxiv.org/abs/1810.03993
Note:
A model card can be loaded and saved to disk.
Parameters:
"""
def __init__(self, **kwargs):
# Recomended attributes from https://arxiv.org/abs/1810.03993 (see papers)
self.model_details = kwargs.pop('model_details', {})
self.intended_use = kwargs.pop('intended_use', {})
self.factors = kwargs.pop('factors', {})
self.metrics = kwargs.pop('metrics', {})
self.evaluation_data = kwargs.pop('evaluation_data', {})
self.training_data = kwargs.pop('training_data', {})
self.quantitative_analyses = kwargs.pop('quantitative_analyses', {})
self.ethical_considerations = kwargs.pop('ethical_considerations', {})
self.caveats_and_recommendations = kwargs.pop('caveats_and_recommendations', {})
# Open additional attributes
for key, value in kwargs.items():
try:
setattr(self, key, value)
except AttributeError as err:
logger.error("Can't set {} with value {} for {}".format(key, value, self))
raise err
def save_pretrained(self, save_directory_or_file):
""" Save a model card object to the directory or file `save_directory_or_file`.
"""
if os.path.isdir(save_directory_or_file):
# If we save using the predefined names, we can load using `from_pretrained`
output_model_card_file = os.path.join(save_directory_or_file, MODEL_CARD_NAME)
else:
output_model_card_file = save_directory_or_file
self.to_json_file(output_model_card_file)
logger.info("Model card saved in {}".format(output_model_card_file))
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
r""" Instantiate a :class:`~transformers.ModelCard` from a pre-trained model model card.
Parameters:
pretrained_model_name_or_path: either:
- a string with the `shortcut name` of a pre-trained model card to load from cache or download, e.g.: ``bert-base-uncased``.
- a string with the `identifier name` of a pre-trained model card that was user-uploaded to our S3, e.g.: ``dbmdz/bert-base-german-cased``.
- a path to a `directory` containing a mode card file saved using the :func:`~transformers.ModelCard.save_pretrained` method, e.g.: ``./my_model_directory/``.
- a path or url to a saved model card JSON `file`, e.g.: ``./my_model_directory/model_card.json``.
cache_dir: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
card should be cached if the standard cache should not be used.
kwargs: (`optional`) dict: key/value pairs with which to update the ModelCard object after loading.
- The values in kwargs of any keys which are model card attributes will be used to override the loaded values.
- Behavior concerning key/value pairs whose keys are *not* model card attributes is controlled by the `return_unused_kwargs` keyword parameter.
force_download: (`optional`) boolean, default False:
Force to (re-)download the model card file and override the cached version if it exists.
resume_download: (`optional`) boolean, default False:
Do not delete incompletely recieved file. Attempt to resume the download if such a file exists.
proxies: (`optional`) dict, default None:
A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
The proxies are used on each request.
return_unused_kwargs: (`optional`) bool:
- If False, then this function returns just the final model card object.
- If True, then this functions returns a tuple `(model card, unused_kwargs)` where `unused_kwargs` is a dictionary consisting of the key/value pairs whose keys are not model card attributes: ie the part of kwargs which has not been used to update `ModelCard` and is otherwise ignored.
Examples::
model_card = ModelCard.from_pretrained('bert-base-uncased') # Download model card from S3 and cache.
model_card = ModelCard.from_pretrained('./test/saved_model/') # E.g. model card was saved using `save_pretrained('./test/saved_model/')`
model_card = ModelCard.from_pretrained('./test/saved_model/model_card.json')
model_card = ModelCard.from_pretrained('bert-base-uncased', output_attention=True, foo=False)
"""
cache_dir = kwargs.pop('cache_dir', None)
force_download = kwargs.pop('force_download', False)
resume_download = kwargs.pop('resume_download', False)
proxies = kwargs.pop('proxies', None)
return_unused_kwargs = kwargs.pop('return_unused_kwargs', False)
if pretrained_model_name_or_path in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP:
# For simplicity we use the same pretrained url than the configuration files but with a different suffix (model_card.json)
model_card_file = ALL_PRETRAINED_CONFIG_ARCHIVE_MAP[pretrained_model_name_or_path]
model_card_file = model_card_file.replace(CONFIG_NAME, MODEL_CARD_NAME)
elif os.path.isdir(pretrained_model_name_or_path):
model_card_file = os.path.join(pretrained_model_name_or_path, MODEL_CARD_NAME)
elif os.path.isfile(pretrained_model_name_or_path) or is_remote_url(pretrained_model_name_or_path):
model_card_file = pretrained_model_name_or_path
else:
model_card_file = hf_bucket_url(pretrained_model_name_or_path, postfix=MODEL_CARD_NAME)
try:
# Load from URL or cache if already cached
resolved_model_card_file = cached_path(model_card_file, cache_dir=cache_dir, force_download=force_download,
proxies=proxies, resume_download=resume_download)
if resolved_model_card_file == model_card_file:
logger.info("loading model card file {}".format(model_card_file))
else:
logger.info("loading model card file {} from cache at {}".format(
model_card_file, resolved_model_card_file))
# Load model card
model_card = cls.from_json_file(resolved_model_card_file)
except EnvironmentError:
if pretrained_model_name_or_path in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP:
logger.warning("Couldn't reach server at '{}' to download model card file.".format(
model_card_file))
else:
logger.warning("Model name '{}' was not found in model name list ({}). " \
"We assumed '{}' was a path or url to a model card file named {} or " \
"a directory containing such a file but couldn't find any such file at this path or url.".format(
pretrained_model_name_or_path,
', '.join(ALL_PRETRAINED_CONFIG_ARCHIVE_MAP.keys()),
model_card_file, MODEL_CARD_NAME))
logger.warning("Creating an empty model card.")
# We fall back on creating an empty model card
model_card = cls()
except json.JSONDecodeError:
logger.warning("Couldn't reach server at '{}' to download model card file or "
"model card file is not a valid JSON file. "
"Please check network or file content here: {}.".format(model_card_file, resolved_model_card_file))
logger.warning("Creating an empty model card.")
# We fall back on creating an empty model card
model_card = cls()
# Update model card with kwargs if needed
to_remove = []
for key, value in kwargs.items():
if hasattr(model_card, key):
setattr(model_card, key, value)
to_remove.append(key)
for key in to_remove:
kwargs.pop(key, None)
logger.info("Model card: %s", str(model_card))
if return_unused_kwargs:
return model_card, kwargs
else:
return model_card
@classmethod
def from_dict(cls, json_object):
"""Constructs a `ModelCard` from a Python dictionary of parameters."""
return cls(**json_object)
@classmethod
def from_json_file(cls, json_file):
"""Constructs a `ModelCard` from a json file of parameters."""
with open(json_file, "r", encoding='utf-8') as reader:
text = reader.read()
dict_obj = json.loads(text)
return cls(**dict_obj)
def __eq__(self, other):
return self.__dict__ == other.__dict__
def __repr__(self):
return str(self.to_json_string())
def to_dict(self):
"""Serializes this instance to a Python dictionary."""
output = copy.deepcopy(self.__dict__)
return output
def to_json_string(self):
"""Serializes this instance to a JSON string."""
return json.dumps(self.to_dict(), indent=2, sort_keys=True) + "\n"
def to_json_file(self, json_file_path):
""" Save this instance to a json file."""
with open(json_file_path, "w", encoding='utf-8') as writer:
writer.write(self.to_json_string())
...@@ -18,18 +18,18 @@ from __future__ import absolute_import, division, print_function, unicode_litera ...@@ -18,18 +18,18 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import logging import logging
from .modeling_bert import BertModel, BertForMaskedLM, BertForSequenceClassification, BertForQuestionAnswering from .modeling_bert import BertModel, BertForMaskedLM, BertForSequenceClassification, BertForQuestionAnswering, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_openai import OpenAIGPTModel, OpenAIGPTLMHeadModel from .modeling_openai import OpenAIGPTModel, OpenAIGPTLMHeadModel, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_gpt2 import GPT2Model, GPT2LMHeadModel from .modeling_gpt2 import GPT2Model, GPT2LMHeadModel, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_ctrl import CTRLModel, CTRLLMHeadModel from .modeling_ctrl import CTRLModel, CTRLLMHeadModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_transfo_xl import TransfoXLModel, TransfoXLLMHeadModel from .modeling_transfo_xl import TransfoXLModel, TransfoXLLMHeadModel, TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_xlnet import XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering from .modeling_xlnet import XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering, XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_xlm import XLMModel, XLMWithLMHeadModel, XLMForSequenceClassification, XLMForQuestionAnswering from .modeling_xlm import XLMModel, XLMWithLMHeadModel, XLMForSequenceClassification, XLMForQuestionAnswering, XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_roberta import RobertaModel, RobertaForMaskedLM, RobertaForSequenceClassification from .modeling_roberta import RobertaModel, RobertaForMaskedLM, RobertaForSequenceClassification, ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_distilbert import DistilBertModel, DistilBertForQuestionAnswering, DistilBertForMaskedLM, DistilBertForSequenceClassification from .modeling_distilbert import DistilBertModel, DistilBertForQuestionAnswering, DistilBertForMaskedLM, DistilBertForSequenceClassification, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_camembert import CamembertModel, CamembertForMaskedLM, CamembertForSequenceClassification, CamembertForMultipleChoice from .modeling_camembert import CamembertModel, CamembertForMaskedLM, CamembertForSequenceClassification, CamembertForMultipleChoice, CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_albert import AlbertModel, AlbertForMaskedLM, AlbertForSequenceClassification, AlbertForQuestionAnswering from .modeling_albert import AlbertModel, AlbertForMaskedLM, AlbertForSequenceClassification, AlbertForQuestionAnswering, ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_t5 import T5Model, T5WithLMHeadModel from .modeling_t5 import T5Model, T5WithLMHeadModel, T5_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_utils import PreTrainedModel, SequenceSummary from .modeling_utils import PreTrainedModel, SequenceSummary
...@@ -38,6 +38,24 @@ from .file_utils import add_start_docstrings ...@@ -38,6 +38,24 @@ from .file_utils import add_start_docstrings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict((key, value)
for pretrained_map in [
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
T5_PRETRAINED_MODEL_ARCHIVE_MAP,
]
for key, value, in pretrained_map.items())
class AutoModel(object): class AutoModel(object):
r""" r"""
:class:`~transformers.AutoModel` is a generic model class :class:`~transformers.AutoModel` is a generic model class
......
...@@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -634,6 +634,7 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
""" """
def __init__(self, config): def __init__(self, config):
super(GPT2DoubleHeadsModel, self).__init__(config) super(GPT2DoubleHeadsModel, self).__init__(config)
config.num_labels = 1
self.transformer = GPT2Model(config) self.transformer = GPT2Model(config)
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
self.multiple_choice_head = SequenceSummary(config) self.multiple_choice_head = SequenceSummary(config)
......
...@@ -590,6 +590,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -590,6 +590,7 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
def __init__(self, config): def __init__(self, config):
super(OpenAIGPTDoubleHeadsModel, self).__init__(config) super(OpenAIGPTDoubleHeadsModel, self).__init__(config)
config.num_labels = 1
self.transformer = OpenAIGPTModel(config) self.transformer = OpenAIGPTModel(config)
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False) self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
self.multiple_choice_head = SequenceSummary(config) self.multiple_choice_head = SequenceSummary(config)
......
...@@ -18,22 +18,40 @@ from __future__ import absolute_import, division, print_function, unicode_litera ...@@ -18,22 +18,40 @@ from __future__ import absolute_import, division, print_function, unicode_litera
import logging import logging
from .modeling_tf_bert import TFBertModel, TFBertForMaskedLM, TFBertForSequenceClassification, TFBertForQuestionAnswering from .modeling_tf_bert import TFBertModel, TFBertForMaskedLM, TFBertForSequenceClassification, TFBertForQuestionAnswering, TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_openai import TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel from .modeling_tf_openai import TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_gpt2 import TFGPT2Model, TFGPT2LMHeadModel from .modeling_tf_gpt2 import TFGPT2Model, TFGPT2LMHeadModel, TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_transfo_xl import TFTransfoXLModel, TFTransfoXLLMHeadModel from .modeling_tf_transfo_xl import TFTransfoXLModel, TFTransfoXLLMHeadModel, TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_xlnet import TFXLNetModel, TFXLNetLMHeadModel, TFXLNetForSequenceClassification, TFXLNetForQuestionAnsweringSimple from .modeling_tf_xlnet import TFXLNetModel, TFXLNetLMHeadModel, TFXLNetForSequenceClassification, TFXLNetForQuestionAnsweringSimple, TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_xlm import TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple from .modeling_tf_xlm import TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple, TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_roberta import TFRobertaModel, TFRobertaForMaskedLM, TFRobertaForSequenceClassification from .modeling_tf_roberta import TFRobertaModel, TFRobertaForMaskedLM, TFRobertaForSequenceClassification, TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_distilbert import TFDistilBertModel, TFDistilBertForQuestionAnswering, TFDistilBertForMaskedLM, TFDistilBertForSequenceClassification from .modeling_tf_distilbert import TFDistilBertModel, TFDistilBertForQuestionAnswering, TFDistilBertForMaskedLM, TFDistilBertForSequenceClassification, TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel from .modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_t5 import TFT5Model, TFT5WithLMHeadModel from .modeling_tf_albert import TFAlbertModel, TFAlbertForMaskedLM, TFAlbertForSequenceClassification, TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_t5 import TFT5Model, TFT5WithLMHeadModel, TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP
from .file_utils import add_start_docstrings from .file_utils import add_start_docstrings
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
TF_ALL_PRETRAINED_MODEL_ARCHIVE_MAP = dict((key, value)
for pretrained_map in [
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP,
]
for key, value, in pretrained_map.items())
class TFAutoModel(object): class TFAutoModel(object):
r""" r"""
:class:`~transformers.TFAutoModel` is a generic model class :class:`~transformers.TFAutoModel` is a generic model class
...@@ -144,6 +162,8 @@ class TFAutoModel(object): ...@@ -144,6 +162,8 @@ class TFAutoModel(object):
return TFT5Model.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFT5Model.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'distilbert' in pretrained_model_name_or_path: elif 'distilbert' in pretrained_model_name_or_path:
return TFDistilBertModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFDistilBertModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'albert' in pretrained_model_name_or_path:
return TFAlbertModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'roberta' in pretrained_model_name_or_path: elif 'roberta' in pretrained_model_name_or_path:
return TFRobertaModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFRobertaModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'bert' in pretrained_model_name_or_path: elif 'bert' in pretrained_model_name_or_path:
...@@ -280,6 +300,8 @@ class TFAutoModelWithLMHead(object): ...@@ -280,6 +300,8 @@ class TFAutoModelWithLMHead(object):
return TFT5WithLMHeadModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFT5WithLMHeadModel.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'distilbert' in pretrained_model_name_or_path: elif 'distilbert' in pretrained_model_name_or_path:
return TFDistilBertForMaskedLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFDistilBertForMaskedLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'albert' in pretrained_model_name_or_path:
return TFAlbertForMaskedLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'roberta' in pretrained_model_name_or_path: elif 'roberta' in pretrained_model_name_or_path:
return TFRobertaForMaskedLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFRobertaForMaskedLM.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'bert' in pretrained_model_name_or_path: elif 'bert' in pretrained_model_name_or_path:
...@@ -407,6 +429,8 @@ class TFAutoModelForSequenceClassification(object): ...@@ -407,6 +429,8 @@ class TFAutoModelForSequenceClassification(object):
""" """
if 'distilbert' in pretrained_model_name_or_path: if 'distilbert' in pretrained_model_name_or_path:
return TFDistilBertForSequenceClassification.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFDistilBertForSequenceClassification.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'albert' in pretrained_model_name_or_path:
return TFAlbertForSequenceClassification.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'roberta' in pretrained_model_name_or_path: elif 'roberta' in pretrained_model_name_or_path:
return TFRobertaForSequenceClassification.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs) return TFRobertaForSequenceClassification.from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
elif 'bert' in pretrained_model_name_or_path: elif 'bert' in pretrained_model_name_or_path:
......
...@@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel): ...@@ -574,6 +574,7 @@ class TFGPT2DoubleHeadsModel(TFGPT2PreTrainedModel):
""" """
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super(TFGPT2DoubleHeadsModel, self).__init__(config, *inputs, **kwargs) super(TFGPT2DoubleHeadsModel, self).__init__(config, *inputs, **kwargs)
config.num_labels = 1
self.transformer = TFGPT2MainLayer(config, name='transformer') self.transformer = TFGPT2MainLayer(config, name='transformer')
self.multiple_choice_head = TFSequenceSummary(config, initializer_range=config.initializer_range, name='multiple_choice_head') self.multiple_choice_head = TFSequenceSummary(config, initializer_range=config.initializer_range, name='multiple_choice_head')
......
...@@ -538,6 +538,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel): ...@@ -538,6 +538,7 @@ class TFOpenAIGPTDoubleHeadsModel(TFOpenAIGPTPreTrainedModel):
""" """
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super(TFOpenAIGPTDoubleHeadsModel, self).__init__(config, *inputs, **kwargs) super(TFOpenAIGPTDoubleHeadsModel, self).__init__(config, *inputs, **kwargs)
config.num_labels = 1
self.transformer = TFOpenAIGPTMainLayer(config, name='transformer') self.transformer = TFOpenAIGPTMainLayer(config, name='transformer')
self.multiple_choice_head = TFSequenceSummary(config, initializer_range=config.initializer_range, name='multiple_choice_head') self.multiple_choice_head = TFSequenceSummary(config, initializer_range=config.initializer_range, name='multiple_choice_head')
......
...@@ -143,7 +143,11 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a ...@@ -143,7 +143,11 @@ def load_pytorch_weights_in_tf2_model(tf_model, pt_state_dict, tf_inputs=None, a
name, transpose = convert_tf_weight_name_to_pt_weight_name(sw_name, start_prefix_to_remove=start_prefix_to_remove) name, transpose = convert_tf_weight_name_to_pt_weight_name(sw_name, start_prefix_to_remove=start_prefix_to_remove)
# Find associated numpy array in pytorch model state dict # Find associated numpy array in pytorch model state dict
assert name in pt_state_dict, "{} not found in PyTorch model".format(name) if name not in pt_state_dict:
if allow_missing_keys:
continue
raise AttributeError("{} not found in PyTorch model".format(name))
array = pt_state_dict[name].numpy() array = pt_state_dict[name].numpy()
if transpose: if transpose:
...@@ -250,6 +254,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F ...@@ -250,6 +254,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
all_tf_weights = set(list(tf_weights_map.keys())) all_tf_weights = set(list(tf_weights_map.keys()))
loaded_pt_weights_data_ptr = {} loaded_pt_weights_data_ptr = {}
missing_keys_pt = []
for pt_weight_name, pt_weight in current_pt_params_dict.items(): for pt_weight_name, pt_weight in current_pt_params_dict.items():
# Handle PyTorch shared weight ()not duplicated in TF 2.0 # Handle PyTorch shared weight ()not duplicated in TF 2.0
if pt_weight.data_ptr() in loaded_pt_weights_data_ptr: if pt_weight.data_ptr() in loaded_pt_weights_data_ptr:
...@@ -258,7 +263,10 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F ...@@ -258,7 +263,10 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
# Find associated numpy array in pytorch model state dict # Find associated numpy array in pytorch model state dict
if pt_weight_name not in tf_weights_map: if pt_weight_name not in tf_weights_map:
raise ValueError("{} not found in TF 2.0 model".format(pt_weight_name)) if allow_missing_keys:
missing_keys_pt.append(pt_weight_name)
continue
raise AttributeError("{} not found in TF 2.0 model".format(pt_weight_name))
array, transpose = tf_weights_map[pt_weight_name] array, transpose = tf_weights_map[pt_weight_name]
...@@ -283,6 +291,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F ...@@ -283,6 +291,7 @@ def load_tf2_weights_in_pytorch_model(pt_model, tf_weights, allow_missing_keys=F
all_tf_weights.discard(pt_weight_name) all_tf_weights.discard(pt_weight_name)
missing_keys, unexpected_keys = pt_model.load_state_dict(new_pt_params_dict, strict=False) missing_keys, unexpected_keys = pt_model.load_state_dict(new_pt_params_dict, strict=False)
missing_keys += missing_keys_pt
if len(missing_keys) > 0: if len(missing_keys) > 0:
logger.info("Weights of {} not initialized from TF 2.0 model: {}".format( logger.info("Weights of {} not initialized from TF 2.0 model: {}".format(
......
...@@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): ...@@ -353,7 +353,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states self.output_hidden_states = config.output_hidden_states
self.n_token = config.n_token self.n_token = config.vocab_size
self.d_embed = config.d_embed self.d_embed = config.d_embed
self.d_model = config.d_model self.d_model = config.d_model
...@@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer): ...@@ -361,7 +361,7 @@ class TFTransfoXLMainLayer(tf.keras.layers.Layer):
self.d_head = config.d_head self.d_head = config.d_head
self.untie_r = config.untie_r self.untie_r = config.untie_r
self.word_emb = TFAdaptiveEmbedding(config.n_token, config.d_embed, config.d_model, config.cutoffs, self.word_emb = TFAdaptiveEmbedding(config.vocab_size, config.d_embed, config.d_model, config.cutoffs,
div_val=config.div_val, init_std=config.init_std, name='word_emb') div_val=config.div_val, init_std=config.init_std, name='word_emb')
self.drop = tf.keras.layers.Dropout(config.dropout) self.drop = tf.keras.layers.Dropout(config.dropout)
...@@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel): ...@@ -729,7 +729,7 @@ class TFTransfoXLLMHeadModel(TFTransfoXLPreTrainedModel):
raise NotImplementedError raise NotImplementedError
# use adaptive softmax (including standard softmax) # use adaptive softmax (including standard softmax)
else: else:
self.crit = TFAdaptiveSoftmaxMask(config.n_token, config.d_embed, config.d_model, self.crit = TFAdaptiveSoftmaxMask(config.vocab_size, config.d_embed, config.d_model,
config.cutoffs, div_val=config.div_val, name='crit') config.cutoffs, div_val=config.div_val, name='crit')
def reset_length(self, tgt_len, ext_len, mem_len): def reset_length(self, tgt_len, ext_len, mem_len):
......
...@@ -25,15 +25,15 @@ import tensorflow as tf ...@@ -25,15 +25,15 @@ import tensorflow as tf
from .modeling_tf_utils import shape_list from .modeling_tf_utils import shape_list
class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
def __init__(self, n_token, d_embed, d_proj, cutoffs, div_val=1, def __init__(self, vocab_size, d_embed, d_proj, cutoffs, div_val=1,
keep_order=False, **kwargs): keep_order=False, **kwargs):
super(TFAdaptiveSoftmaxMask, self).__init__(**kwargs) super(TFAdaptiveSoftmaxMask, self).__init__(**kwargs)
self.n_token = n_token self.vocab_size = vocab_size
self.d_embed = d_embed self.d_embed = d_embed
self.d_proj = d_proj self.d_proj = d_proj
self.cutoffs = cutoffs + [n_token] self.cutoffs = cutoffs + [vocab_size]
self.cutoff_ends = [0] + self.cutoffs self.cutoff_ends = [0] + self.cutoffs
self.div_val = div_val self.div_val = div_val
...@@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): ...@@ -66,11 +66,11 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
self.out_projs.append(weight) self.out_projs.append(weight)
else: else:
self.out_projs.append(None) self.out_projs.append(None)
weight = self.add_weight(shape=(self.n_token, self.d_embed,), weight = self.add_weight(shape=(self.vocab_size, self.d_embed,),
initializer='zeros', initializer='zeros',
trainable=True, trainable=True,
name='out_layers_._{}_._weight'.format(i)) name='out_layers_._{}_._weight'.format(i))
bias = self.add_weight(shape=(self.n_token,), bias = self.add_weight(shape=(self.vocab_size,),
initializer='zeros', initializer='zeros',
trainable=True, trainable=True,
name='out_layers_._{}_._bias'.format(i)) name='out_layers_._{}_._bias'.format(i))
...@@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer): ...@@ -114,7 +114,7 @@ class TFAdaptiveSoftmaxMask(tf.keras.layers.Layer):
hidden, target = inputs hidden, target = inputs
head_logprob = 0 head_logprob = 0
if self.n_clusters == 0: if self.n_clusters == 0:
softmax_b = tf.get_variable('bias', [n_token], initializer=tf.zeros_initializer()) softmax_b = tf.get_variable('bias', [self.config.vocab_size], initializer=tf.zeros_initializer())
output = self._logit(hidden, self.out_layers[0][0], self.out_layers[0][1], self.out_projs[0]) output = self._logit(hidden, self.out_layers[0][0], self.out_layers[0][1], self.out_projs[0])
if target is not None: if target is not None:
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output)
......
...@@ -297,7 +297,7 @@ class TFPreTrainedModel(tf.keras.Model): ...@@ -297,7 +297,7 @@ class TFPreTrainedModel(tf.keras.Model):
if from_pt: if from_pt:
# Load from a PyTorch checkpoint # Load from a PyTorch checkpoint
return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file) return load_pytorch_checkpoint_in_tf2_model(model, resolved_archive_file, allow_missing_keys=True)
ret = model(model.dummy_inputs, training=False) # build the network with dummy inputs ret = model(model.dummy_inputs, training=False) # build the network with dummy inputs
......
...@@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer): ...@@ -366,7 +366,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
self.use_bfloat16 = config.use_bfloat16 self.use_bfloat16 = config.use_bfloat16
self.initializer_range = config.initializer_range self.initializer_range = config.initializer_range
self.word_embedding = TFSharedEmbeddings(config.n_token, config.d_model, initializer_range=config.initializer_range, name='word_embedding') self.word_embedding = TFSharedEmbeddings(config.vocab_size, config.d_model, initializer_range=config.initializer_range, name='word_embedding')
self.layer = [TFXLNetLayer(config, name='layer_._{}'.format(i)) for i in range(config.n_layer)] self.layer = [TFXLNetLayer(config, name='layer_._{}'.format(i)) for i in range(config.n_layer)]
self.dropout = tf.keras.layers.Dropout(config.dropout) self.dropout = tf.keras.layers.Dropout(config.dropout)
......
...@@ -592,14 +592,14 @@ class TransfoXLModel(TransfoXLPreTrainedModel): ...@@ -592,14 +592,14 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
self.output_attentions = config.output_attentions self.output_attentions = config.output_attentions
self.output_hidden_states = config.output_hidden_states self.output_hidden_states = config.output_hidden_states
self.n_token = config.n_token self.n_token = config.vocab_size
self.d_embed = config.d_embed self.d_embed = config.d_embed
self.d_model = config.d_model self.d_model = config.d_model
self.n_head = config.n_head self.n_head = config.n_head
self.d_head = config.d_head self.d_head = config.d_head
self.word_emb = AdaptiveEmbedding(config.n_token, config.d_embed, config.d_model, config.cutoffs, self.word_emb = AdaptiveEmbedding(config.vocab_size, config.d_embed, config.d_model, config.cutoffs,
div_val=config.div_val) div_val=config.div_val)
self.drop = nn.Dropout(config.dropout) self.drop = nn.Dropout(config.dropout)
...@@ -836,11 +836,11 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): ...@@ -836,11 +836,11 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
self.sample_softmax = config.sample_softmax self.sample_softmax = config.sample_softmax
# use sampled softmax # use sampled softmax
if config.sample_softmax > 0: if config.sample_softmax > 0:
self.out_layer = nn.Linear(config.d_model, config.n_token) self.out_layer = nn.Linear(config.d_model, config.vocab_size)
self.sampler = LogUniformSampler(config.n_token, config.sample_softmax) self.sampler = LogUniformSampler(config.vocab_size, config.sample_softmax)
# use adaptive softmax (including standard softmax) # use adaptive softmax (including standard softmax)
else: else:
self.crit = ProjectedAdaptiveLogSoftmax(config.n_token, config.d_embed, config.d_model, self.crit = ProjectedAdaptiveLogSoftmax(config.vocab_size, config.d_embed, config.d_model,
config.cutoffs, div_val=config.div_val) config.cutoffs, div_val=config.div_val)
self.init_weights() self.init_weights()
......
...@@ -609,7 +609,7 @@ class XLNetModel(XLNetPreTrainedModel): ...@@ -609,7 +609,7 @@ class XLNetModel(XLNetPreTrainedModel):
self.clamp_len = config.clamp_len self.clamp_len = config.clamp_len
self.n_layer = config.n_layer self.n_layer = config.n_layer
self.word_embedding = nn.Embedding(config.n_token, config.d_model) self.word_embedding = nn.Embedding(config.vocab_size, config.d_model)
self.mask_emb = nn.Parameter(torch.FloatTensor(1, 1, config.d_model)) self.mask_emb = nn.Parameter(torch.FloatTensor(1, 1, config.d_model))
self.layer = nn.ModuleList([XLNetLayer(config) for _ in range(config.n_layer)]) self.layer = nn.ModuleList([XLNetLayer(config) for _ in range(config.n_layer)])
self.dropout = nn.Dropout(config.dropout) self.dropout = nn.Dropout(config.dropout)
...@@ -940,7 +940,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -940,7 +940,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
self.same_length = config.same_length self.same_length = config.same_length
self.transformer = XLNetModel(config) self.transformer = XLNetModel(config)
self.lm_loss = nn.Linear(config.d_model, config.n_token, bias=True) self.lm_loss = nn.Linear(config.d_model, config.vocab_size, bias=True)
self.init_weights() self.init_weights()
......
...@@ -16,15 +16,12 @@ from __future__ import absolute_import ...@@ -16,15 +16,12 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import copy
import os import os
import shutil
import json import json
import random import tempfile
import uuid
import unittest import unittest
import logging from .tokenization_tests_commons import TemporaryDirectory
class ConfigTester(object): class ConfigTester(object):
...@@ -48,16 +45,28 @@ class ConfigTester(object): ...@@ -48,16 +45,28 @@ class ConfigTester(object):
def create_and_test_config_to_json_file(self): def create_and_test_config_to_json_file(self):
config_first = self.config_class(**self.inputs_dict) config_first = self.config_class(**self.inputs_dict)
json_file_path = os.path.join(os.getcwd(), "config_" + str(uuid.uuid4()) + ".json")
config_first.to_json_file(json_file_path) with TemporaryDirectory() as tmpdirname:
config_second = self.config_class.from_json_file(json_file_path) json_file_path = os.path.join(tmpdirname, "config.json")
os.remove(json_file_path) config_first.to_json_file(json_file_path)
config_second = self.config_class.from_json_file(json_file_path)
self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
def create_and_test_config_from_and_save_pretrained(self):
config_first = self.config_class(**self.inputs_dict)
with TemporaryDirectory() as tmpdirname:
config_first.save_pretrained(tmpdirname)
config_second = self.config_class.from_pretrained(tmpdirname)
self.parent.assertEqual(config_second.to_dict(), config_first.to_dict()) self.parent.assertEqual(config_second.to_dict(), config_first.to_dict())
def run_common_tests(self): def run_common_tests(self):
self.create_and_test_config_common_properties() self.create_and_test_config_common_properties()
self.create_and_test_config_to_json_string() self.create_and_test_config_to_json_string()
self.create_and_test_config_to_json_file() self.create_and_test_config_to_json_file()
self.create_and_test_config_from_and_save_pretrained()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
\ No newline at end of file
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import json
import unittest
from transformers.model_card import ModelCard
from .tokenization_tests_commons import TemporaryDirectory
class ModelCardTester(unittest.TestCase):
def setUp(self):
self.inputs_dict = {'model_details': {
'Organization': 'testing',
'Model date': 'today',
'Model version': 'v2.1, Developed by Test Corp in 2019.',
'Architecture': 'Convolutional Neural Network.',
},
'metrics': 'BLEU and ROUGE-1',
'evaluation_data':{
'Datasets':{
'BLEU': 'My-great-dataset-v1',
'ROUGE-1': 'My-short-dataset-v2.1',
},
'Preprocessing': 'See details on https://arxiv.org/pdf/1810.03993.pdf'
},
'training_data':{
'Dataset': 'English Wikipedia dump dated 2018-12-01',
'Preprocessing': 'Using SentencePiece vocabulary of size 52k tokens. See details on https://arxiv.org/pdf/1810.03993.pdf'
},
'quantitative_analyses': {
'BLEU': 55.1,
'ROUGE-1': 76,
},
}
def test_model_card_common_properties(self):
model_card = ModelCard.from_dict(self.inputs_dict)
self.assertTrue(hasattr(model_card, 'model_details'))
self.assertTrue(hasattr(model_card, 'intended_use'))
self.assertTrue(hasattr(model_card, 'factors'))
self.assertTrue(hasattr(model_card, 'metrics'))
self.assertTrue(hasattr(model_card, 'evaluation_data'))
self.assertTrue(hasattr(model_card, 'training_data'))
self.assertTrue(hasattr(model_card, 'quantitative_analyses'))
self.assertTrue(hasattr(model_card, 'ethical_considerations'))
self.assertTrue(hasattr(model_card, 'caveats_and_recommendations'))
def test_model_card_to_json_string(self):
model_card = ModelCard.from_dict(self.inputs_dict)
obj = json.loads(model_card.to_json_string())
for key, value in self.inputs_dict.items():
self.assertEqual(obj[key], value)
def test_model_card_to_json_file(self):
model_card_first = ModelCard.from_dict(self.inputs_dict)
with TemporaryDirectory() as tmpdirname:
filename = os.path.join(tmpdirname, u"model_card.json")
model_card_first.to_json_file(filename)
model_card_second = ModelCard.from_json_file(filename)
self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
def test_model_card_from_and_save_pretrained(self):
model_card_first = ModelCard.from_dict(self.inputs_dict)
with TemporaryDirectory() as tmpdirname:
model_card_first.save_pretrained(tmpdirname)
model_card_second = ModelCard.from_pretrained(tmpdirname)
self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment