Commit 2918b7d2 authored by thomwolf's avatar thomwolf
Browse files

updating tests

parent 3fbceed8
...@@ -617,6 +617,7 @@ class BertModel(BertPreTrainedModel): ...@@ -617,6 +617,7 @@ class BertModel(BertPreTrainedModel):
old_embeddings = self.embeddings.word_embeddings old_embeddings = self.embeddings.word_embeddings
new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens) new_embeddings = self._get_resized_embeddings(old_embeddings, new_num_tokens)
self.embeddings.word_embeddings = new_embeddings self.embeddings.word_embeddings = new_embeddings
return self.embeddings.word_embeddings
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model. """ Prunes heads of the model.
...@@ -758,11 +759,8 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -758,11 +759,8 @@ class BertForPreTraining(BertPreTrainedModel):
""" Make sure we are sharing the input and output embeddings. """ Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead. Export to TorchScript can't handle parameter sharing so we are cloning them instead.
""" """
input_embeddings = self.bert.embeddings.word_embeddings.weight self._tie_or_clone_weights(self.cls.predictions.decoder,
if self.config.torchscript: self.bert.embeddings.word_embeddings)
self.cls.predictions.decoder.weight = nn.Parameter(input_embeddings.clone())
else:
self.cls.predictions.decoder.weight = input_embeddings # Tied weights
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None,
next_sentence_label=None, head_mask=None): next_sentence_label=None, head_mask=None):
...@@ -864,11 +862,8 @@ class BertForMaskedLM(BertPreTrainedModel): ...@@ -864,11 +862,8 @@ class BertForMaskedLM(BertPreTrainedModel):
""" Make sure we are sharing the input and output embeddings. """ Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead. Export to TorchScript can't handle parameter sharing so we are cloning them instead.
""" """
input_embeddings = self.bert.embeddings.word_embeddings.weight self._tie_or_clone_weights(self.cls.predictions.decoder,
if self.config.torchscript: self.bert.embeddings.word_embeddings)
self.cls.predictions.decoder.weight = nn.Parameter(input_embeddings.clone())
else:
self.cls.predictions.decoder.weight = input_embeddings # Tied weights
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, head_mask=None): def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, head_mask=None):
""" """
......
...@@ -414,6 +414,7 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -414,6 +414,7 @@ class GPT2Model(GPT2PreTrainedModel):
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
self.wte = self._get_resized_embeddings(self.wte, new_num_tokens) self.wte = self._get_resized_embeddings(self.wte, new_num_tokens)
return self.wte
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model. """ Prunes heads of the model.
...@@ -562,11 +563,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel): ...@@ -562,11 +563,8 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
""" Make sure we are sharing the input and output embeddings. """ Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead. Export to TorchScript can't handle parameter sharing so we are cloning them instead.
""" """
input_embeddings = self.transformer.wte.weight self._tie_or_clone_weights(self.lm_head,
if self.config.torchscript: self.transformer.wte)
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
else:
self.lm_head.weight = input_embeddings # Tied weights
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, past=None, head_mask=None): def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, past=None, head_mask=None):
""" """
...@@ -658,11 +656,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel): ...@@ -658,11 +656,8 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
""" Make sure we are sharing the input and output embeddings. """ Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead. Export to TorchScript can't handle parameter sharing so we are cloning them instead.
""" """
input_embeddings = self.transformer.wte.weight self._tie_or_clone_weights(self.lm_head,
if self.config.torchscript: self.transformer.wte)
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
else:
self.lm_head.weight = input_embeddings # Tied weights
def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None, def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None,
position_ids=None, past=None, head_mask=None): position_ids=None, past=None, head_mask=None):
......
...@@ -430,6 +430,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel): ...@@ -430,6 +430,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
self.tokens_embed = self._get_resized_embeddings(self.tokens_embed, new_num_tokens) self.tokens_embed = self._get_resized_embeddings(self.tokens_embed, new_num_tokens)
return self.tokens_embed
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model. """ Prunes heads of the model.
...@@ -583,11 +584,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel): ...@@ -583,11 +584,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
""" Make sure we are sharing the input and output embeddings. """ Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead. Export to TorchScript can't handle parameter sharing so we are cloning them instead.
""" """
input_embeddings = self.transformer.tokens_embed.weight self._tie_or_clone_weights(self.lm_head,
if self.config.torchscript: self.transformer.tokens_embed)
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
else:
self.lm_head.weight = input_embeddings # Tied weights
def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, head_mask=None): def forward(self, input_ids, position_ids=None, token_type_ids=None, lm_labels=None, head_mask=None):
""" """
...@@ -696,11 +694,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel): ...@@ -696,11 +694,8 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
""" Make sure we are sharing the input and output embeddings. """ Make sure we are sharing the input and output embeddings.
Export to TorchScript can't handle parameter sharing so we are cloning them instead. Export to TorchScript can't handle parameter sharing so we are cloning them instead.
""" """
input_embeddings = self.transformer.tokens_embed.weight self._tie_or_clone_weights(self.lm_head,
if self.config.torchscript: self.transformer.tokens_embed)
self.lm_head.weight = nn.Parameter(input_embeddings.clone())
else:
self.lm_head.weight = input_embeddings # Tied weights
def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None, def forward(self, input_ids, mc_token_ids=None, lm_labels=None, mc_labels=None, token_type_ids=None,
position_ids=None, head_mask=None): position_ids=None, head_mask=None):
......
...@@ -291,6 +291,10 @@ class TransfoXLConfig(PretrainedConfig): ...@@ -291,6 +291,10 @@ class TransfoXLConfig(PretrainedConfig):
def vocab_size(self): def vocab_size(self):
return self.n_token return self.n_token
@vocab_size.setter
def vocab_size(self, value):
self.n_token = value
@property @property
def hidden_size(self): def hidden_size(self):
return self.d_model return self.d_model
...@@ -1003,7 +1007,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel): ...@@ -1003,7 +1007,7 @@ class TransfoXLModel(TransfoXLPreTrainedModel):
self.apply(self.init_weights) self.apply(self.init_weights)
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
raise NotImplementedError return self.word_emb
def backward_compatible(self): def backward_compatible(self):
self.sample_softmax = -1 self.sample_softmax = -1
...@@ -1280,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel): ...@@ -1280,13 +1284,20 @@ class TransfoXLLMHeadModel(TransfoXLPreTrainedModel):
else: else:
if self.config.tie_weight: if self.config.tie_weight:
for i in range(len(self.crit.out_layers)): for i in range(len(self.crit.out_layers)):
self.crit.out_layers[i].weight = self.transformer.word_emb.emb_layers[i].weight self._tie_or_clone_weights(self.crit.out_layers[i],
self.transformer.word_emb.emb_layers[i])
if self.config.tie_projs: if self.config.tie_projs:
for i, tie_proj in enumerate(self.config.tie_projs): for i, tie_proj in enumerate(self.config.tie_projs):
if tie_proj and self.config.div_val == 1 and self.config.d_model != self.config.d_embed: if tie_proj and self.config.div_val == 1 and self.config.d_model != self.config.d_embed:
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[0] if self.config.torchscript:
self.crit.out_projs[i] = nn.Parameter(self.transformer.word_emb.emb_projs[0].clone())
else:
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[0]
elif tie_proj and self.config.div_val != 1: elif tie_proj and self.config.div_val != 1:
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i] if self.config.torchscript:
self.crit.out_projs[i] = nn.Parameter(self.transformer.word_emb.emb_projs[i].clone())
else:
self.crit.out_projs[i] = self.transformer.word_emb.emb_projs[i]
def reset_length(self, tgt_len, ext_len, mem_len): def reset_length(self, tgt_len, ext_len, mem_len):
self.transformer.reset_length(tgt_len, ext_len, mem_len) self.transformer.reset_length(tgt_len, ext_len, mem_len)
......
...@@ -165,9 +165,27 @@ class PreTrainedModel(nn.Module): ...@@ -165,9 +165,27 @@ class PreTrainedModel(nn.Module):
# Save config in model # Save config in model
self.config = config self.config = config
def _get_resized_embeddings(self, old_embeddings, new_num_tokens): def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
# Build new embeddings """ Build a resized Embedding Module from a provided token Embedding Module.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
Args:
new_num_tokens: (Optional) New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
If not provided or None: return the provided token Embedding Module.
Return:
Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
"""
if new_num_tokens is None:
return old_embeddings
old_num_tokens, old_embedding_dim = old_embeddings.weight.size() old_num_tokens, old_embedding_dim = old_embeddings.weight.size()
if old_num_tokens == new_num_tokens:
return old_embeddings
# Build new embeddings
new_embeddings = nn.Embedding(new_num_tokens, old_embedding_dim) new_embeddings = nn.Embedding(new_num_tokens, old_embedding_dim)
new_embeddings.to(old_embeddings.weight.device) new_embeddings.to(old_embeddings.weight.device)
...@@ -180,18 +198,29 @@ class PreTrainedModel(nn.Module): ...@@ -180,18 +198,29 @@ class PreTrainedModel(nn.Module):
return new_embeddings return new_embeddings
def resize_token_embeddings(self, new_num_tokens): def _tie_or_clone_weights(self, first_module, second_module):
""" Resize input token embeddings matrix. """ Tie or clone module weights depending of weither we are using TorchScript or not
"""
if self.config.torchscript:
first_module.weight = nn.Parameter(second_module.weight.clone())
else:
first_module.weight = second_module.weight
def resize_token_embeddings(self, new_num_tokens=None):
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
Args: Args:
new_num_tokens: New number of tokens in the embedding matrix. new_num_tokens: (Optional) New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end Reducing the size will remove vectors from the end
If not provided or None: does nothing.
Return:
Pointer to the input tokens Embedding Module of the model
""" """
if new_num_tokens == self.config.vocab_size:
return
base_model = getattr(self, self.base_model_prefix, self) # get the base model if needed base_model = getattr(self, self.base_model_prefix, self) # get the base model if needed
base_model._resize_token_embeddings(new_num_tokens) model_embeds = base_model._resize_token_embeddings(new_num_tokens)
if new_num_tokens is None:
return model_embeds
# Update base model and current model config # Update base model and current model config
self.config.vocab_size = new_num_tokens self.config.vocab_size = new_num_tokens
...@@ -201,6 +230,8 @@ class PreTrainedModel(nn.Module): ...@@ -201,6 +230,8 @@ class PreTrainedModel(nn.Module):
if hasattr(self, 'tie_weights'): if hasattr(self, 'tie_weights'):
self.tie_weights() self.tie_weights()
return model_embeds
def prune_heads(self, heads_to_prune): def prune_heads(self, heads_to_prune):
""" Prunes heads of the base model. """ Prunes heads of the base model.
heads_to_prune: dict of {layer_num: list of heads to prune in this layer} heads_to_prune: dict of {layer_num: list of heads to prune in this layer}
......
...@@ -184,6 +184,10 @@ class XLMConfig(PretrainedConfig): ...@@ -184,6 +184,10 @@ class XLMConfig(PretrainedConfig):
def vocab_size(self): def vocab_size(self):
return self.n_words return self.n_words
@vocab_size.setter
def vocab_size(self, value):
self.n_words = value
@property @property
def hidden_size(self): def hidden_size(self):
return self.emb_dim return self.emb_dim
...@@ -479,6 +483,7 @@ class XLMModel(XLMPreTrainedModel): ...@@ -479,6 +483,7 @@ class XLMModel(XLMPreTrainedModel):
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
self.embeddings = self._get_resized_embeddings(self.embeddings, new_num_tokens) self.embeddings = self._get_resized_embeddings(self.embeddings, new_num_tokens)
return self.embeddings
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
""" Prunes heads of the model. """ Prunes heads of the model.
...@@ -728,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel): ...@@ -728,10 +733,7 @@ class XLMWithLMHeadModel(XLMPreTrainedModel):
def tie_weights(self): def tie_weights(self):
""" Make sure we are sharing the embeddings """ Make sure we are sharing the embeddings
""" """
if self.config.torchscript: self._tie_or_clone_weights(self.pred_layer.proj, self.transformer.embeddings)
self.pred_layer.proj.weight = nn.Parameter(self.transformer.embeddings.weight.clone())
else:
self.pred_layer.proj.weight = self.transformer.embeddings.weight
def forward(self, input_ids, lengths=None, positions=None, langs=None, token_type_ids=None, def forward(self, input_ids, lengths=None, positions=None, langs=None, token_type_ids=None,
attention_mask=None, cache=None, labels=None, head_mask=None): attention_mask=None, cache=None, labels=None, head_mask=None):
......
...@@ -316,6 +316,10 @@ class XLNetConfig(PretrainedConfig): ...@@ -316,6 +316,10 @@ class XLNetConfig(PretrainedConfig):
def vocab_size(self): def vocab_size(self):
return self.n_token return self.n_token
@vocab_size.setter
def vocab_size(self, value):
self.n_token = value
@property @property
def hidden_size(self): def hidden_size(self):
return self.d_model return self.d_model
...@@ -660,10 +664,10 @@ class XLNetModel(XLNetPreTrainedModel): ...@@ -660,10 +664,10 @@ class XLNetModel(XLNetPreTrainedModel):
def _resize_token_embeddings(self, new_num_tokens): def _resize_token_embeddings(self, new_num_tokens):
self.word_embedding = self._get_resized_embeddings(self.word_embedding, new_num_tokens) self.word_embedding = self._get_resized_embeddings(self.word_embedding, new_num_tokens)
return self.word_embedding
def _prune_heads(self, heads_to_prune): def _prune_heads(self, heads_to_prune):
logger.info("Head pruning is not implemented for XLNet") raise NotImplementedError
pass
def create_mask(self, qlen, mlen): def create_mask(self, qlen, mlen):
""" """
...@@ -987,10 +991,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel): ...@@ -987,10 +991,7 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
def tie_weights(self): def tie_weights(self):
""" Make sure we are sharing the embeddings """ Make sure we are sharing the embeddings
""" """
if self.config.torchscript: self._tie_or_clone_weights(self.lm_loss, self.transformer.word_embedding)
self.lm_loss.weight = nn.Parameter(self.transformer.word_embedding.weight.clone())
else:
self.lm_loss.weight = self.transformer.word_embedding.weight
def forward(self, input_ids, token_type_ids=None, input_mask=None, attention_mask=None, def forward(self, input_ids, token_type_ids=None, input_mask=None, attention_mask=None,
mems=None, perm_mask=None, target_mapping=None, inp_q=None, mems=None, perm_mask=None, target_mapping=None, inp_q=None,
......
...@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM, ...@@ -26,10 +26,15 @@ from pytorch_transformers import (BertConfig, BertModel, BertForMaskedLM,
BertForTokenClassification, BertForMultipleChoice) BertForTokenClassification, BertForMultipleChoice)
from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP from pytorch_transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (create_and_check_commons, ConfigTester, ids_tensor) from .modeling_common_test import (CommonTestCases, ConfigTester, ids_tensor)
class BertModelTest(unittest.TestCase): class BertModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction,
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
BertForTokenClassification)
class BertModelTester(object): class BertModelTester(object):
def __init__(self, def __init__(self,
...@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase): ...@@ -55,9 +60,6 @@ class BertModelTest(unittest.TestCase):
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction,
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
BertForTokenClassification),
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
...@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase): ...@@ -81,7 +83,6 @@ class BertModelTest(unittest.TestCase):
self.num_labels = num_labels self.num_labels = num_labels
self.num_choices = num_choices self.num_choices = num_choices
self.scope = scope self.scope = scope
self.all_model_classes = all_model_classes
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -253,52 +254,59 @@ class BertModelTest(unittest.TestCase): ...@@ -253,52 +254,59 @@ class BertModelTest(unittest.TestCase):
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_commons(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask,
sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask}
create_and_check_commons(self, config, inputs_dict) return config, inputs_dict
def test_default(self): def setUp(self):
self.run_tester(BertModelTest.BertModelTester(self)) self.model_tester = BertModelTest.BertModelTester(self)
self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
def test_config(self): def test_config(self):
config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37) self.config_tester.run_common_tests()
config_tester.run_common_tests()
@pytest.mark.slow def test_bert_model(self):
def test_model_from_pretrained(self): config_and_inputs = self.model_tester.prepare_config_and_inputs()
cache_dir = "/tmp/pytorch_transformers_test/" self.model_tester.create_and_check_bert_model(*config_and_inputs)
for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = BertModel.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model)
def run_tester(self, tester): def test_for_masked_lm(self):
config_and_inputs = tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
tester.create_and_check_bert_model(*config_and_inputs) self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() def test_for_multiple_choice(self):
tester.create_and_check_bert_for_masked_lm(*config_and_inputs) config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() def test_for_next_sequence_prediction(self):
tester.create_and_check_bert_for_multiple_choice(*config_and_inputs) config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() def test_for_pretraining(self):
tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs) config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_pretraining(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() def test_for_question_answering(self):
tester.create_and_check_bert_for_pretraining(*config_and_inputs) config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_question_answering(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() def test_for_sequence_classification(self):
tester.create_and_check_bert_for_question_answering(*config_and_inputs) config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() def test_for_token_classification(self):
tester.create_and_check_bert_for_sequence_classification(*config_and_inputs) config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_token_classification(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() @pytest.mark.slow
tester.create_and_check_bert_for_token_classification(*config_and_inputs) def test_model_from_pretrained(self):
cache_dir = "/tmp/pytorch_transformers_test/"
config_and_inputs = tester.prepare_config_and_inputs() for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
tester.create_and_check_bert_commons(*config_and_inputs) model = BertModel.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -39,207 +39,471 @@ def _config_zero_init(config): ...@@ -39,207 +39,471 @@ def _config_zero_init(config):
setattr(configs_no_init, key, 0.0) setattr(configs_no_init, key, 0.0)
return configs_no_init return configs_no_init
def _create_and_check_torchscript_output_attentions(tester, model_classes, config, inputs_dict): class CommonTestCases:
config.output_attentions = True
_create_and_check_torchscript(tester, model_classes, config, inputs_dict) class CommonModelTester(unittest.TestCase):
def _create_and_check_torchscript_output_hidden_state(tester, model_classes, config, inputs_dict): model_tester = None
config.output_hidden_states = True all_model_classes = ()
_create_and_check_torchscript(tester, model_classes, config, inputs_dict) test_torchscript = True
test_pruning = True
def _create_and_check_torchscript(tester, model_classes, config, inputs_dict): test_resize_embeddings = True
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.torchscript = True def test_initialization(self):
for model_class in model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model = model_class(config=configs_no_init)
model.eval() configs_no_init = _config_zero_init(config)
inputs = inputs_dict['input_ids'] # Let's keep only input_ids for model_class in self.all_model_classes:
model = model_class(config=configs_no_init)
try: for name, param in model.named_parameters():
torch.jit.trace(model, inputs) if param.requires_grad:
except RuntimeError: self.assertIn(param.data.mean().item(), [0.0, 1.0],
tester.parent.fail("Couldn't trace module.") msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
try: def test_attention_outputs(self):
traced_gpt2 = torch.jit.trace(model, inputs) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
torch.jit.save(traced_gpt2, "traced_model.pt")
except RuntimeError: for model_class in self.all_model_classes:
tester.parent.fail("Couldn't save module.") config.output_attentions = True
config.output_hidden_states = False
try: model = model_class(config)
loaded_model = torch.jit.load("traced_model.pt") model.eval()
os.remove("traced_model.pt") outputs = model(**inputs_dict)
except ValueError: attentions = outputs[-1]
tester.parent.fail("Couldn't load module.") self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False)
model.eval() self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
loaded_model.eval() self.assertListEqual(
list(attentions[0].shape[-3:]),
model_params = model.parameters() [self.model_tester.num_attention_heads,
loaded_model_params = loaded_model.parameters() self.model_tester.seq_length,
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
models_equal = True out_len = len(outputs)
for p1, p2 in zip(model_params, loaded_model_params):
if p1.data.ne(p2.data).sum() > 0: # Check attention is always last and order is fine
models_equal = False config.output_attentions = True
config.output_hidden_states = True
tester.parent.assertTrue(models_equal) model = model_class(config)
model.eval()
def _create_and_check_initialization(tester, model_classes, config, inputs_dict): outputs = model(**inputs_dict)
configs_no_init = _config_zero_init(config) self.assertEqual(out_len+1, len(outputs))
for model_class in model_classes: self.assertEqual(model.config.output_attentions, True)
model = model_class(config=configs_no_init) self.assertEqual(model.config.output_hidden_states, True)
for name, param in model.named_parameters():
if param.requires_grad: attentions = outputs[-1]
tester.parent.assertIn(param.data.mean().item(), [0.0, 1.0], self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class)) self.assertListEqual(
list(attentions[0].shape[-3:]),
def _create_and_check_for_headmasking(tester, model_classes, config, inputs_dict): [self.model_tester.num_attention_heads,
configs_no_init = _config_zero_init(config) # To be sure we have no Nan self.model_tester.seq_length,
for model_class in model_classes: self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
config.output_attentions = True
config.output_hidden_states = True def test_torchscript(self):
model = model_class(config=configs_no_init) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
model.eval()
self._create_and_check_torchscript(config, inputs_dict)
# Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior) def test_torchscript_output_attentions(self):
head_mask = torch.ones(tester.num_hidden_layers, tester.num_attention_heads) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
head_mask[0, 0] = 0
head_mask[-1, :-1] = 0 config.output_attentions = True
head_mask.requires_grad_(requires_grad=True) self._create_and_check_torchscript(config, inputs_dict)
inputs = inputs_dict.copy()
inputs['head_mask'] = head_mask def test_torchscript_output_hidden_state(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
outputs = model(**inputs)
config.output_hidden_states = True
# Test that we can get a gradient back for importance score computation self._create_and_check_torchscript(config, inputs_dict)
output = sum(t.sum() for t in outputs[0])
output = output.sum() def _create_and_check_torchscript(self, config, inputs_dict):
output.backward() if not self.test_torchscript:
multihead_outputs = head_mask.grad return
attentions = outputs[-1] configs_no_init = _config_zero_init(config) # To be sure we have no Nan
hidden_states = outputs[-2] configs_no_init.torchscript = True
for model_class in self.all_model_classes:
# Remove Nan model = model_class(config=configs_no_init)
model.eval()
tester.parent.assertIsNotNone(multihead_outputs) inputs = inputs_dict['input_ids'] # Let's keep only input_ids
tester.parent.assertEqual(len(multihead_outputs), tester.num_hidden_layers)
tester.parent.assertAlmostEqual( try:
attentions[0][..., 0, :, :].flatten().sum().item(), 0.0) torch.jit.trace(model, inputs)
tester.parent.assertNotEqual( except RuntimeError:
attentions[0][..., -1, :, :].flatten().sum().item(), 0.0) self.fail("Couldn't trace module.")
tester.parent.assertNotEqual(
attentions[1][..., 0, :, :].flatten().sum().item(), 0.0) try:
tester.parent.assertAlmostEqual( traced_gpt2 = torch.jit.trace(model, inputs)
attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0) torch.jit.save(traced_gpt2, "traced_model.pt")
tester.parent.assertNotEqual( except RuntimeError:
attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0) self.fail("Couldn't save module.")
try:
def _create_and_check_for_head_pruning(tester, model_classes, config, inputs_dict): loaded_model = torch.jit.load("traced_model.pt")
for model_class in model_classes: os.remove("traced_model.pt")
config.output_attentions = True except ValueError:
config.output_hidden_states = False self.fail("Couldn't load module.")
model = model_class(config=config)
model.eval() model.eval()
heads_to_prune = {0: list(range(1, tester.num_attention_heads)), loaded_model.eval()
-1: [0]}
model.prune_heads(heads_to_prune) model_params = model.parameters()
outputs = model(**inputs_dict) loaded_model_params = loaded_model.parameters()
attentions = outputs[-1] models_equal = True
for p1, p2 in zip(model_params, loaded_model_params):
tester.parent.assertEqual( if p1.data.ne(p2.data).sum() > 0:
attentions[0].shape[-3], 1) models_equal = False
tester.parent.assertEqual(
attentions[1].shape[-3], tester.num_attention_heads) self.assertTrue(models_equal)
tester.parent.assertEqual(
attentions[-1].shape[-3], tester.num_attention_heads - 1)
def test_headmasking(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def _create_and_check_for_attentions(tester, model_classes, config, inputs_dict):
for model_class in model_classes: config.output_attentions = True
config.output_attentions = True config.output_hidden_states = True
config.output_hidden_states = False configs_no_init = _config_zero_init(config) # To be sure we have no Nan
model = model_class(config) for model_class in self.all_model_classes:
model.eval() model = model_class(config=configs_no_init)
outputs = model(**inputs_dict) model.eval()
attentions = outputs[-1]
tester.parent.assertEqual(model.config.output_attentions, True) # Prepare head_mask
tester.parent.assertEqual(model.config.output_hidden_states, False) # Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
tester.parent.assertEqual(len(attentions), tester.num_hidden_layers) head_mask = torch.ones(self.model_tester.num_hidden_layers, self.model_tester.num_attention_heads)
tester.parent.assertListEqual( head_mask[0, 0] = 0
list(attentions[0].shape[-3:]), head_mask[-1, :-1] = 0
[tester.num_attention_heads, head_mask.requires_grad_(requires_grad=True)
tester.seq_length, inputs = inputs_dict.copy()
tester.key_len if hasattr(tester, 'key_len') else tester.seq_length]) inputs['head_mask'] = head_mask
out_len = len(outputs)
outputs = model(**inputs)
# Check attention is always last and order is fine
config.output_attentions = True # Test that we can get a gradient back for importance score computation
config.output_hidden_states = True output = sum(t.sum() for t in outputs[0])
model = model_class(config) output = output.sum()
model.eval() output.backward()
outputs = model(**inputs_dict) multihead_outputs = head_mask.grad
tester.parent.assertEqual(out_len+1, len(outputs))
tester.parent.assertEqual(model.config.output_attentions, True) attentions = outputs[-1]
tester.parent.assertEqual(model.config.output_hidden_states, True) hidden_states = outputs[-2]
attentions = outputs[-1] # Remove Nan
tester.parent.assertEqual(len(attentions), tester.num_hidden_layers)
tester.parent.assertListEqual( self.assertIsNotNone(multihead_outputs)
list(attentions[0].shape[-3:]), self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers)
[tester.num_attention_heads, self.assertAlmostEqual(
tester.seq_length, attentions[0][..., 0, :, :].flatten().sum().item(), 0.0)
tester.key_len if hasattr(tester, 'key_len') else tester.seq_length]) self.assertNotEqual(
attentions[0][..., -1, :, :].flatten().sum().item(), 0.0)
def _create_and_check_for_hidden_states(tester, model_classes, config, inputs_dict): self.assertNotEqual(
for model_class in model_classes: attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
config.output_hidden_states = True self.assertAlmostEqual(
config.output_attentions = False attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0)
model = model_class(config) self.assertNotEqual(
model.eval() attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
outputs = model(**inputs_dict)
hidden_states = outputs[-1]
tester.parent.assertEqual(model.config.output_attentions, False) def test_head_pruning(self):
tester.parent.assertEqual(model.config.output_hidden_states, True) if not self.test_pruning:
tester.parent.assertEqual(len(hidden_states), tester.num_hidden_layers + 1) return
tester.parent.assertListEqual(
list(hidden_states[0].shape[-2:]), config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
[tester.seq_length, tester.hidden_size])
for model_class in self.all_model_classes:
config.output_attentions = True
def create_and_check_commons(tester, config, inputs_dict, test_pruning=True, test_torchscript=True): config.output_hidden_states = False
_create_and_check_initialization(tester, tester.all_model_classes, config, inputs_dict) model = model_class(config=config)
_create_and_check_for_attentions(tester, tester.all_model_classes, config, inputs_dict) model.eval()
_create_and_check_for_headmasking(tester, tester.all_model_classes, config, inputs_dict) heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)),
_create_and_check_for_hidden_states(tester, tester.all_model_classes, config, inputs_dict) -1: [0]}
model.prune_heads(heads_to_prune)
if test_torchscript: outputs = model(**inputs_dict)
_create_and_check_torchscript(tester, tester.all_model_classes, config, inputs_dict)
_create_and_check_torchscript_output_attentions(tester, tester.all_model_classes, config, inputs_dict) attentions = outputs[-1]
_create_and_check_torchscript_output_hidden_state(tester, tester.all_model_classes, config, inputs_dict)
self.assertEqual(
if test_pruning: attentions[0].shape[-3], 1)
_create_and_check_for_head_pruning(tester, tester.all_model_classes, config, inputs_dict) self.assertEqual(
attentions[1].shape[-3], self.model_tester.num_attention_heads)
self.assertEqual(
attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
config.output_hidden_states = True
config.output_attentions = False
model = model_class(config)
model.eval()
outputs = model(**inputs_dict)
hidden_states = outputs[-1]
self.assertEqual(model.config.output_attentions, False)
self.assertEqual(model.config.output_hidden_states, True)
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[self.model_tester.seq_length, self.model_tester.hidden_size])
def test_resize_tokens_embeddings(self):
original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if not self.test_resize_embeddings:
return
for model_class in self.all_model_classes:
config = copy.deepcopy(original_config)
model = model_class(config)
model_vocab_size = config.vocab_size
# Retrieve the embeddings and clone theme
model_embed = model.resize_token_embeddings(model_vocab_size)
cloned_embeddings = model_embed.weight.clone()
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model_embed = model.resize_token_embeddings(model_vocab_size + 10)
self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
# Check that it actually resizes the embeddings matrix
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model_embed = model.resize_token_embeddings(model_vocab_size - 15)
self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
# Check that it actually resizes the embeddings matrix
self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal = True
for p1, p2 in zip(cloned_embeddings, model_embed.weight):
if p1.data.ne(p2.data).sum() > 0:
models_equal = False
self.assertTrue(models_equal)
def test_tie_model_weights(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
def check_same_values(layer_1, layer_2):
equal = True
for p1, p2 in zip(layer_1.weight, layer_2.weight):
if p1.data.ne(p2.data).sum() > 0:
equal = False
return equal
for model_class in self.all_model_classes:
if not hasattr(model_class, 'tie_weights'):
continue
config.torchscript = True
model_not_tied = model_class(config)
params_not_tied = list(model_not_tied.parameters())
config_tied = copy.deepcopy(config)
config_tied.torchscript = False
model_tied = model_class(config_tied)
params_tied = list(model_tied.parameters())
# Check that the embedding layer and decoding layer are the same in size and in value
self.assertGreater(len(params_not_tied), len(params_tied))
# self.assertTrue(check_same_values(embeddings, decoding))
# # Check that after modification, they remain the same.
# embeddings.weight.data.div_(2)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
# self.assertTrue(check_same_values(embeddings, decoding))
# # Check that after modification, they remain the same.
# decoding.weight.data.div_(4)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
# self.assertTrue(check_same_values(embeddings, decoding))
# Check that after resize they remain tied.
model_tied.resize_token_embeddings(config.vocab_size + 10)
params_tied_2 = list(model_tied.parameters())
self.assertGreater(len(params_not_tied), len(params_tied))
self.assertEqual(len(params_tied_2), len(params_tied))
# decoding.weight.data.mul_(20)
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape)
# self.assertTrue(check_same_values(model.transformer.wte, model.lm_head))
class GPTModelTester(CommonModelTester):
def __init__(self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_position_ids=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
n_positions=33,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
n_choices=3,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
scope=None,
config_class=None,
base_model_class=None,
lm_head_model_class=None,
double_head_model_class=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_position_ids = use_position_ids
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.n_positions = n_positions
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.n_choices = n_choices
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.scope = scope
self.config_class = config_class
self.base_model_class = base_model_class
self.lm_head_model_class = lm_head_model_class
self.double_head_model_class = double_head_model_class
self.all_model_classes = (base_model_class, lm_head_model_class, double_head_model_class)
def prepare_config_and_inputs(self):
total_num_tokens = self.vocab_size
input_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_num_tokens)
position_ids = None
if self.use_position_ids:
position_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.n_positions)
token_type_ids = None
if self.use_token_type_ids:
total_voc = self.vocab_size
token_type_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)
mc_labels = None
lm_labels = None
mc_token_ids = None
if self.use_labels:
mc_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
lm_labels = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.num_labels)
mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length)
config = self.config_class(
vocab_size_or_config_json_file=self.vocab_size,
n_positions=self.n_positions,
n_embd=self.hidden_size,
n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads,
initializer_range=self.initializer_range)
return (config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids)
def create_and_check_base_model(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
model = self.base_model_class(config)
model.eval()
outputs = model(input_ids, position_ids, token_type_ids)
outputs = model(input_ids, position_ids)
outputs = model(input_ids)
def ids_tensor(shape, vocab_size, rng=None, name=None): hidden_state = outputs[0]
"""Creates a random int32 tensor of the shape within the vocab size.""" self.parent.assertListEqual(
if rng is None: list(hidden_state.size()),
rng = random.Random() [self.batch_size, self.n_choices, self.seq_length, self.hidden_size])
total_dims = 1
for dim in shape:
total_dims *= dim
values = [] def create_and_check_lm_head(self, config, input_ids, token_type_ids, position_ids,
for _ in range(total_dims): mc_labels, lm_labels, mc_token_ids):
values.append(rng.randint(0, vocab_size - 1)) model = self.lm_head_model_class(config)
model.eval()
outputs = model(input_ids, position_ids, token_type_ids, lm_labels)
loss, lm_logits = outputs[:2]
return torch.tensor(data=values, dtype=torch.long).view(shape).contiguous() total_voc = self.vocab_size
self.parent.assertListEqual(
list(lm_logits.size()),
[self.batch_size, self.n_choices, self.seq_length, total_voc])
self.parent.assertListEqual(
list(loss.size()),
[])
def create_and_check_presents(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
for model_class in self.all_model_classes:
model = model_class(config)
model.eval()
outputs = model(input_ids)
presents = outputs[-1]
self.parent.assertEqual(self.num_hidden_layers, len(presents))
self.parent.assertListEqual(
list(presents[0].size()),
[2, self.batch_size * self.n_choices, self.num_attention_heads,
self.seq_length, self.hidden_size // self.num_attention_heads])
def create_and_check_double_heads(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
model = self.double_head_model_class(config)
model.eval()
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels,
token_type_ids=token_type_ids, position_ids=position_ids)
lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
loss = [lm_loss, mc_loss]
total_voc = self.vocab_size
self.parent.assertListEqual(
list(lm_logits.size()),
[self.batch_size, self.n_choices, self.seq_length, total_voc])
self.parent.assertListEqual(
list(mc_logits.size()),
[self.batch_size, self.n_choices])
self.parent.assertListEqual(
[list(l.size()) for l in loss],
[[], []])
def create_and_check_model_from_pretrained(self):
cache_dir = "/tmp/pytorch_transformers_test/"
for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]:
model = self.base_model_class.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.parent.assertIsNotNone(model)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids) = config_and_inputs
inputs_dict = {'input_ids': input_ids}
return config, inputs_dict
def run_common_tests(self, test_presents=False):
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_base_model(*config_and_inputs)
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_lm_head(*config_and_inputs)
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_double_heads(*config_and_inputs)
if test_presents:
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_presents(*config_and_inputs)
def run_slow_tests(self):
self.create_and_check_model_from_pretrained()
class ConfigTester(object): class ConfigTester(object):
...@@ -275,179 +539,22 @@ class ConfigTester(object): ...@@ -275,179 +539,22 @@ class ConfigTester(object):
self.create_and_test_config_to_json_file() self.create_and_test_config_to_json_file()
class GPTModelTester(object):
def __init__(self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_position_ids=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
n_positions=33,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
n_choices=3,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
scope=None,
config_class=None,
base_model_class=None,
lm_head_model_class=None,
double_head_model_class=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_position_ids = use_position_ids
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.n_positions = n_positions
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.n_choices = n_choices
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.scope = scope
self.config_class = config_class
self.base_model_class = base_model_class
self.lm_head_model_class = lm_head_model_class
self.double_head_model_class = double_head_model_class
self.all_model_classes = (base_model_class, lm_head_model_class, double_head_model_class)
def prepare_config_and_inputs(self):
total_num_tokens = self.vocab_size
input_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_num_tokens)
position_ids = None def ids_tensor(shape, vocab_size, rng=None, name=None):
if self.use_position_ids: """Creates a random int32 tensor of the shape within the vocab size."""
position_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.n_positions) if rng is None:
rng = random.Random()
token_type_ids = None total_dims = 1
if self.use_token_type_ids: for dim in shape:
total_voc = self.vocab_size total_dims *= dim
token_type_ids = ids_tensor([self.batch_size, self.n_choices, self.seq_length], total_voc)
mc_labels = None
lm_labels = None
mc_token_ids = None
if self.use_labels:
mc_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
lm_labels = ids_tensor([self.batch_size, self.n_choices, self.seq_length], self.num_labels)
mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length)
config = self.config_class(
vocab_size_or_config_json_file=self.vocab_size,
n_positions=self.n_positions,
n_embd=self.hidden_size,
n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads,
initializer_range=self.initializer_range)
return (config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids)
def create_and_check_base_model(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
model = self.base_model_class(config)
model.eval()
outputs = model(input_ids, position_ids, token_type_ids)
outputs = model(input_ids, position_ids)
outputs = model(input_ids)
hidden_state = outputs[0]
self.parent.assertListEqual(
list(hidden_state.size()),
[self.batch_size, self.n_choices, self.seq_length, self.hidden_size])
def create_and_check_lm_head(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
model = self.lm_head_model_class(config)
model.eval()
outputs = model(input_ids, position_ids, token_type_ids, lm_labels)
loss, lm_logits = outputs[:2]
total_voc = self.vocab_size
self.parent.assertListEqual(
list(lm_logits.size()),
[self.batch_size, self.n_choices, self.seq_length, total_voc])
self.parent.assertListEqual(
list(loss.size()),
[])
def create_and_check_presents(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
for model_class in self.all_model_classes:
model = model_class(config)
model.eval()
outputs = model(input_ids)
presents = outputs[-1]
self.parent.assertEqual(self.num_hidden_layers, len(presents))
self.parent.assertListEqual(
list(presents[0].size()),
[2, self.batch_size * self.n_choices, self.num_attention_heads,
self.seq_length, self.hidden_size // self.num_attention_heads])
def create_and_check_double_heads(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
model = self.double_head_model_class(config)
model.eval()
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels,
token_type_ids=token_type_ids, position_ids=position_ids)
lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
loss = [lm_loss, mc_loss]
total_voc = self.vocab_size
self.parent.assertListEqual(
list(lm_logits.size()),
[self.batch_size, self.n_choices, self.seq_length, total_voc])
self.parent.assertListEqual(
list(mc_logits.size()),
[self.batch_size, self.n_choices])
self.parent.assertListEqual(
[list(l.size()) for l in loss],
[[], []])
def create_and_check_model_from_pretrained(self):
cache_dir = "/tmp/pytorch_transformers_test/"
for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]:
model = self.base_model_class.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.parent.assertIsNotNone(model)
def create_and_check_commons(self, config, input_ids, token_type_ids, position_ids,
mc_labels, lm_labels, mc_token_ids):
inputs_dict = {'input_ids': input_ids}
create_and_check_commons(self, config, inputs_dict)
def run_common_tests(self, test_presents=False):
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_base_model(*config_and_inputs)
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_lm_head(*config_and_inputs)
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_double_heads(*config_and_inputs)
if test_presents:
config_and_inputs = self.prepare_config_and_inputs()
self.create_and_check_presents(*config_and_inputs)
config_and_inputs = self.prepare_config_and_inputs() values = []
self.create_and_check_commons(*config_and_inputs) for _ in range(total_dims):
values.append(rng.randint(0, vocab_size - 1))
def run_slow_tests(self): return torch.tensor(data=values, dtype=torch.long).view(shape).contiguous()
self.create_and_check_model_from_pretrained()
class ModelUtilsTest(unittest.TestCase): class ModelUtilsTest(unittest.TestCase):
...@@ -471,79 +578,6 @@ class ModelUtilsTest(unittest.TestCase): ...@@ -471,79 +578,6 @@ class ModelUtilsTest(unittest.TestCase):
self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config.output_hidden_states, True)
self.assertEqual(model.config, config) self.assertEqual(model.config, config)
def test_resize_tokens_embeddings(self):
logging.basicConfig(level=logging.INFO)
for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
config = BertConfig.from_pretrained(model_name)
model = BertModel.from_pretrained(model_name)
model_vocab_size = config.vocab_size
# Retrieve the embeddings and clone theme
cloned_embeddings = model.embeddings.word_embeddings.weight.clone()
# Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
model.resize_token_embeddings(model_vocab_size + 10)
self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
# Check that it actually resizes the embeddings matrix
self.assertEqual(model.embeddings.word_embeddings.weight.shape[0], cloned_embeddings.shape[0] + 10)
# Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
model.resize_token_embeddings(model_vocab_size)
self.assertEqual(model.config.vocab_size, model_vocab_size)
# Check that it actually resizes the embeddings matrix
self.assertEqual(model.embeddings.word_embeddings.weight.shape[0], cloned_embeddings.shape[0])
# Check that adding and removing tokens has not modified the first part of the embedding matrix.
models_equal = True
for p1, p2 in zip(cloned_embeddings, model.embeddings.word_embeddings.weight):
if p1.data.ne(p2.data).sum() > 0:
models_equal = False
self.assertTrue(models_equal)
def test_tie_model_weights(self):
logging.basicConfig(level=logging.INFO)
def check_same_values(layer_1, layer_2):
equal = True
for p1, p2 in zip(layer_1.weight, layer_2.weight):
if p1.data.ne(p2.data).sum() > 0:
equal = False
return equal
for model_name in list(GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
config = GPT2Config.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
# Get the embeddings and decoding layer
embeddings = model.transformer.wte
decoding = model.lm_head
# Check that the embedding layer and decoding layer are the same in size and in value
self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
self.assertTrue(check_same_values(embeddings, decoding))
# Check that after modification, they remain the same.
embeddings.weight.data.div_(2)
# Check that the embedding layer and decoding layer are the same in size and in value
self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
self.assertTrue(check_same_values(embeddings, decoding))
# Check that after modification, they remain the same.
decoding.weight.data.div_(4)
# Check that the embedding layer and decoding layer are the same in size and in value
self.assertTrue(embeddings.weight.shape, decoding.weight.shape)
self.assertTrue(check_same_values(embeddings, decoding))
# Check that after resize they remain tied.
model.resize_token_embeddings(config.vocab_size + 10)
decoding.weight.data.mul_(20)
# Check that the embedding layer and decoding layer are the same in size and in value
self.assertTrue(model.transformer.wte.weight.shape, model.lm_head.weight.shape)
self.assertTrue(check_same_values(model.transformer.wte, model.lm_head))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -16,19 +16,14 @@ from __future__ import absolute_import ...@@ -16,19 +16,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import os
import unittest import unittest
import json
import random
import shutil
import pytest import pytest
import torch
from pytorch_transformers import (GPT2Config, GPT2Model, from pytorch_transformers import (GPT2Config, GPT2Model,
GPT2LMHeadModel, GPT2DoubleHeadsModel) GPT2LMHeadModel, GPT2DoubleHeadsModel)
from .modeling_common_test import (create_and_check_commons, ConfigTester, GPTModelTester) from .modeling_common_test import CommonTestCases, ConfigTester
class GPT2ModelTest(unittest.TestCase): class GPT2ModelTest(unittest.TestCase):
...@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase): ...@@ -37,14 +32,14 @@ class GPT2ModelTest(unittest.TestCase):
config_tester.run_common_tests() config_tester.run_common_tests()
def test_model(self): def test_model(self):
model_tester = GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model, model_tester = CommonTestCases.GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model,
lm_head_model_class=GPT2LMHeadModel, lm_head_model_class=GPT2LMHeadModel,
double_head_model_class=GPT2DoubleHeadsModel) double_head_model_class=GPT2DoubleHeadsModel)
model_tester.run_common_tests(test_presents=True) model_tester.run_common_tests(test_presents=True)
@pytest.mark.slow @pytest.mark.slow
def test_pretrained(self): def test_pretrained(self):
model_tester = GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model, model_tester = CommonTestCases.GPTModelTester(self, config_class=GPT2Config, base_model_class=GPT2Model,
lm_head_model_class=GPT2LMHeadModel, lm_head_model_class=GPT2LMHeadModel,
double_head_model_class=GPT2DoubleHeadsModel) double_head_model_class=GPT2DoubleHeadsModel)
model_tester.run_slow_tests() model_tester.run_slow_tests()
......
...@@ -19,12 +19,11 @@ from __future__ import print_function ...@@ -19,12 +19,11 @@ from __future__ import print_function
import unittest import unittest
import pytest import pytest
import torch
from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel, from pytorch_transformers import (OpenAIGPTConfig, OpenAIGPTModel,
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
from .modeling_common_test import (create_and_check_commons, ConfigTester, GPTModelTester) from .modeling_common_test import CommonTestCases, ConfigTester
class OpenAIModelTest(unittest.TestCase): class OpenAIModelTest(unittest.TestCase):
...@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase): ...@@ -33,14 +32,14 @@ class OpenAIModelTest(unittest.TestCase):
config_tester.run_common_tests() config_tester.run_common_tests()
def test_model(self): def test_model(self):
model_tester = GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel, model_tester = CommonTestCases.GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel,
lm_head_model_class=OpenAIGPTLMHeadModel, lm_head_model_class=OpenAIGPTLMHeadModel,
double_head_model_class=OpenAIGPTDoubleHeadsModel) double_head_model_class=OpenAIGPTDoubleHeadsModel)
model_tester.run_common_tests(test_presents=False) model_tester.run_common_tests(test_presents=False)
@pytest.mark.slow @pytest.mark.slow
def test_pretrained(self): def test_pretrained(self):
model_tester = GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel, model_tester = CommonTestCases.GPTModelTester(self, config_class=OpenAIGPTConfig, base_model_class=OpenAIGPTModel,
lm_head_model_class=OpenAIGPTLMHeadModel, lm_head_model_class=OpenAIGPTLMHeadModel,
double_head_model_class=OpenAIGPTDoubleHeadsModel) double_head_model_class=OpenAIGPTDoubleHeadsModel)
model_tester.run_slow_tests() model_tester.run_slow_tests()
......
...@@ -28,9 +28,15 @@ import torch ...@@ -28,9 +28,15 @@ import torch
from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel) from pytorch_transformers import (TransfoXLConfig, TransfoXLModel, TransfoXLLMHeadModel)
from pytorch_transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP from pytorch_transformers.modeling_transfo_xl import TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import ConfigTester, create_and_check_commons, ids_tensor from .modeling_common_test import ConfigTester, CommonTestCases, ids_tensor
class TransfoXLModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (TransfoXLModel, TransfoXLLMHeadModel)
test_pruning = False
test_torchscript = False
test_resize_embeddings = False
class TransfoXLModelTest(unittest.TestCase):
class TransfoXLModelTester(object): class TransfoXLModelTester(object):
def __init__(self, def __init__(self,
...@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase): ...@@ -52,7 +58,6 @@ class TransfoXLModelTest(unittest.TestCase):
num_hidden_layers=5, num_hidden_layers=5,
scope=None, scope=None,
seed=1, seed=1,
all_model_classes=(TransfoXLModel, TransfoXLLMHeadModel),
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
...@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase): ...@@ -73,7 +78,6 @@ class TransfoXLModelTest(unittest.TestCase):
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.scope = scope self.scope = scope
self.seed = seed self.seed = seed
self.all_model_classes = all_model_classes
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase): ...@@ -171,16 +175,31 @@ class TransfoXLModelTest(unittest.TestCase):
list(list(mem.size()) for mem in result["mems_2"]), list(list(mem.size()) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
def create_and_check_transfo_xl_commons(self, config, input_ids_1, input_ids_2, lm_labels): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids_1} inputs_dict = {'input_ids': input_ids_1}
create_and_check_commons(self, config, inputs_dict, test_pruning=False, test_torchscript=False) return config, inputs_dict
def test_default(self): def setUp(self):
self.run_tester(TransfoXLModelTest.TransfoXLModelTester(self)) self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self)
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
def test_config(self): def test_config(self):
config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) self.config_tester.run_common_tests()
config_tester.run_common_tests()
def test_transfo_xl_model(self):
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
output_result = self.model_tester.create_transfo_xl_model(*config_and_inputs)
self.model_tester.check_transfo_xl_model_output(output_result)
def test_transfo_xl_lm_head(self):
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
output_result = self.model_tester.create_transfo_xl_lm_head(*config_and_inputs)
self.model_tester.check_transfo_xl_lm_head_output(output_result)
@pytest.mark.slow @pytest.mark.slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
...@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase): ...@@ -190,23 +209,6 @@ class TransfoXLModelTest(unittest.TestCase):
shutil.rmtree(cache_dir) shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
def run_tester(self, tester):
config_and_inputs = tester.prepare_config_and_inputs()
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
output_result = tester.create_transfo_xl_model(*config_and_inputs)
tester.check_transfo_xl_model_output(output_result)
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
output_result = tester.create_transfo_xl_lm_head(*config_and_inputs)
tester.check_transfo_xl_lm_head_output(output_result)
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
tester.create_and_check_transfo_xl_commons(*config_and_inputs)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -23,10 +23,15 @@ import pytest ...@@ -23,10 +23,15 @@ import pytest
from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification) from pytorch_transformers import (XLMConfig, XLMModel, XLMWithLMHeadModel, XLMForQuestionAnswering, XLMForSequenceClassification)
from pytorch_transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP from pytorch_transformers.modeling_xlm import XLM_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (create_and_check_commons, ConfigTester, ids_tensor) from .modeling_common_test import (CommonTestCases, ConfigTester, ids_tensor)
class XLMModelTest(unittest.TestCase): class XLMModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (XLMModel, XLMWithLMHeadModel,
XLMForQuestionAnswering, XLMForSequenceClassification)
# , XLMForSequenceClassification, XLMForTokenClassification),
class XLMModelTester(object): class XLMModelTester(object):
def __init__(self, def __init__(self,
...@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase): ...@@ -58,8 +63,6 @@ class XLMModelTest(unittest.TestCase):
summary_type="last", summary_type="last",
use_proj=True, use_proj=True,
scope=None, scope=None,
all_model_classes = (XLMModel, XLMWithLMHeadModel,
XLMForQuestionAnswering, XLMForSequenceClassification), # , XLMForSequenceClassification, XLMForTokenClassification),
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
...@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase): ...@@ -90,7 +93,6 @@ class XLMModelTest(unittest.TestCase):
self.num_labels = num_labels self.num_labels = num_labels
self.num_choices = num_choices self.num_choices = num_choices
self.scope = scope self.scope = scope
self.all_model_classes = all_model_classes
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase): ...@@ -237,28 +239,23 @@ class XLMModelTest(unittest.TestCase):
[self.batch_size, self.type_sequence_label_size]) [self.batch_size, self.type_sequence_label_size])
def create_and_check_xlm_commons(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_lengths,
sequence_labels, token_labels, is_impossible_labels, input_mask) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'lengths': input_lengths} inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'lengths': input_lengths}
create_and_check_commons(self, config, inputs_dict) return config, inputs_dict
def test_default(self): def setUp(self):
self.run_tester(XLMModelTest.XLMModelTester(self)) self.model_tester = XLMModelTest.XLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
def test_config(self): def test_config(self):
config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37) self.config_tester.run_common_tests()
config_tester.run_common_tests()
@pytest.mark.slow
def test_model_from_pretrained(self):
cache_dir = "/tmp/pytorch_transformers_test/"
for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = XLMModel.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model)
def run_tester(self, tester): def test_xlm_model(self):
config_and_inputs = tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
tester.create_and_check_xlm_model(*config_and_inputs) self.model_tester.create_and_check_xlm_model(*config_and_inputs)
# config_and_inputs = tester.prepare_config_and_inputs() # config_and_inputs = tester.prepare_config_and_inputs()
# tester.create_and_check_xlm_for_masked_lm(*config_and_inputs) # tester.create_and_check_xlm_for_masked_lm(*config_and_inputs)
...@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase): ...@@ -275,8 +272,14 @@ class XLMModelTest(unittest.TestCase):
# config_and_inputs = tester.prepare_config_and_inputs() # config_and_inputs = tester.prepare_config_and_inputs()
# tester.create_and_check_xlm_for_token_classification(*config_and_inputs) # tester.create_and_check_xlm_for_token_classification(*config_and_inputs)
config_and_inputs = tester.prepare_config_and_inputs() @pytest.mark.slow
tester.create_and_check_xlm_commons(*config_and_inputs) def test_model_from_pretrained(self):
cache_dir = "/tmp/pytorch_transformers_test/"
for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = XLMModel.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -28,9 +28,14 @@ import torch ...@@ -28,9 +28,14 @@ import torch
from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering) from pytorch_transformers import (XLNetConfig, XLNetModel, XLNetLMHeadModel, XLNetForSequenceClassification, XLNetForQuestionAnswering)
from pytorch_transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP from pytorch_transformers.modeling_xlnet import XLNET_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import ConfigTester, create_and_check_commons, ids_tensor from .modeling_common_test import ConfigTester, CommonTestCases, ids_tensor
class XLNetModelTest(CommonTestCases.CommonModelTester):
all_model_classes=(XLNetModel, XLNetLMHeadModel,
XLNetForSequenceClassification, XLNetForQuestionAnswering)
test_pruning = False
class XLNetModelTest(unittest.TestCase):
class XLNetModelTester(object): class XLNetModelTester(object):
def __init__(self, def __init__(self,
...@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase): ...@@ -56,8 +61,6 @@ class XLNetModelTest(unittest.TestCase):
initializer_range=0.05, initializer_range=0.05,
seed=1, seed=1,
type_vocab_size=2, type_vocab_size=2,
all_model_classes=(XLNetModel, XLNetLMHeadModel,
XLNetForSequenceClassification, XLNetForQuestionAnswering),
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
...@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase): ...@@ -82,7 +85,6 @@ class XLNetModelTest(unittest.TestCase):
self.seed = seed self.seed = seed
self.type_vocab_size = type_vocab_size self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = type_sequence_label_size
self.all_model_classes = all_model_classes
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase): ...@@ -264,17 +266,41 @@ class XLNetModelTest(unittest.TestCase):
list(list(mem.size()) for mem in result["mems_1"]), list(list(mem.size()) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers)
def create_and_check_xlnet_commons(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask, def prepare_config_and_inputs_for_common(self):
target_mapping, inp_q, segment_ids, lm_labels, sequence_labels, is_impossible_labels): config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
target_mapping, inp_q, segment_ids, lm_labels,
sequence_labels, is_impossible_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids_1} inputs_dict = {'input_ids': input_ids_1}
create_and_check_commons(self, config, inputs_dict, test_pruning=False) return config, inputs_dict
def test_default(self): def setUp(self):
self.run_tester(XLNetModelTest.XLNetModelTester(self)) self.model_tester = XLNetModelTest.XLNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
def test_config(self): def test_config(self):
config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) self.config_tester.run_common_tests()
config_tester.run_common_tests()
def test_xlnet_base_model(self):
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_xlnet_base_model(*config_and_inputs)
def test_xlnet_lm_head(self):
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_xlnet_lm_head(*config_and_inputs)
def test_xlnet_sequence_classif(self):
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)
def test_xlnet_qa(self):
self.model_tester.set_seed()
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_xlnet_qa(*config_and_inputs)
@pytest.mark.slow @pytest.mark.slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
...@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase): ...@@ -284,27 +310,6 @@ class XLNetModelTest(unittest.TestCase):
shutil.rmtree(cache_dir) shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
def run_tester(self, tester):
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
tester.create_and_check_xlnet_base_model(*config_and_inputs)
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
tester.create_and_check_xlnet_lm_head(*config_and_inputs)
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
tester.create_and_check_xlnet_sequence_classif(*config_and_inputs)
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
tester.create_and_check_xlnet_qa(*config_and_inputs)
tester.set_seed()
config_and_inputs = tester.prepare_config_and_inputs()
tester.create_and_check_xlnet_commons(*config_and_inputs)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment