Commit fa84ae26 authored by Aymeric Augustin's avatar Aymeric Augustin
Browse files

Reformat source code with black.

This is the result of:

    $ black --line-length 119 examples templates transformers utils hubconf.py setup.py

There's a lot of fairly long lines in the project. As a consequence, I'm
picking the longest widely accepted line length, 119 characters.

This is also Thomas' preference, because it allows for explicit variable
names, to make the code easier to understand.
parent 63e3827c
...@@ -21,10 +21,9 @@ import pdb ...@@ -21,10 +21,9 @@ import pdb
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available(): if is_torch_available():
from transformers import (CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel
CTRLLMHeadModel)
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import CommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
...@@ -39,32 +38,32 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -39,32 +38,32 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
test_head_masking = False test_head_masking = False
class CTRLModelTester(object): class CTRLModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -129,12 +128,20 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -129,12 +128,20 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = CTRLModel(config=config) model = CTRLModel(config=config)
...@@ -150,8 +157,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -150,8 +157,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
"presents": presents, "presents": presents,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertEqual(len(result["presents"]), config.n_layer) self.parent.assertEqual(len(result["presents"]), config.n_layer)
def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
...@@ -161,29 +168,28 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -161,29 +168,28 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss, self.parent.assertListEqual(list(result["loss"].size()), [])
"lm_logits": lm_logits
}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = { input_mask,
'input_ids': input_ids, head_mask,
'token_type_ids': token_type_ids, token_type_ids,
'head_mask': head_mask mc_token_ids,
} sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask}
return config, inputs_dict return config, inputs_dict
......
...@@ -21,11 +21,16 @@ import unittest ...@@ -21,11 +21,16 @@ import unittest
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available(): if is_torch_available():
from transformers import (DistilBertConfig, DistilBertModel, DistilBertForMaskedLM, from transformers import (
DistilBertForTokenClassification, DistilBertConfig,
DistilBertForQuestionAnswering, DistilBertForSequenceClassification) DistilBertModel,
DistilBertForMaskedLM,
from .modeling_common_test import (CommonTestCases, ids_tensor) DistilBertForTokenClassification,
DistilBertForQuestionAnswering,
DistilBertForSequenceClassification,
)
from .modeling_common_test import CommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
...@@ -33,39 +38,42 @@ from .utils import CACHE_DIR, require_torch, slow, torch_device ...@@ -33,39 +38,42 @@ from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
class DistilBertModelTest(CommonTestCases.CommonModelTester): class DistilBertModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, all_model_classes = (
DistilBertForSequenceClassification) if is_torch_available() else None (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, DistilBertForSequenceClassification)
if is_torch_available()
else None
)
test_pruning = True test_pruning = True
test_torchscript = True test_torchscript = True
test_resize_embeddings = True test_resize_embeddings = True
test_head_masking = True test_head_masking = True
class DistilBertModelTester(object): class DistilBertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=False, use_token_type_ids=False,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -114,16 +122,17 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -114,16 +122,17 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
dropout=self.hidden_dropout_prob, dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob, attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_distilbert_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_model(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = DistilBertModel(config=config) model = DistilBertModel(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -134,10 +143,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -134,10 +143,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"sequence_output": sequence_output, "sequence_output": sequence_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_distilbert_for_masked_lm(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_masked_lm(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = DistilBertForMaskedLM(config=config) model = DistilBertForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -147,29 +158,31 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -147,29 +158,31 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_distilbert_for_question_answering(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_question_answering(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = DistilBertForQuestionAnswering(config=config) model = DistilBertForQuestionAnswering(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, start_logits, end_logits = model(input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels) loss, start_logits, end_logits = model(
input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels
)
result = { result = {
"loss": loss, "loss": loss,
"start_logits": start_logits, "start_logits": start_logits,
"end_logits": end_logits, "end_logits": end_logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
list(result["start_logits"].size()), self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].size()),
[self.batch_size, self.seq_length])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_distilbert_for_sequence_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_sequence_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = DistilBertForSequenceClassification(config) model = DistilBertForSequenceClassification(config)
model.to(torch_device) model.to(torch_device)
...@@ -179,12 +192,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -179,12 +192,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
list(result["logits"].size()),
[self.batch_size, self.num_labels])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_distilbert_for_token_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_token_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = DistilBertForTokenClassification(config=config) model = DistilBertForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
...@@ -196,14 +209,14 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -196,14 +209,14 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].size()), list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'attention_mask': input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -239,5 +252,6 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -239,5 +252,6 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) # model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model) # self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -39,13 +39,13 @@ class EncoderDecoderModelTest(unittest.TestCase): ...@@ -39,13 +39,13 @@ class EncoderDecoderModelTest(unittest.TestCase):
def test_model2model_from_pretrained_not_bert(self): def test_model2model_from_pretrained_not_bert(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = Model2Model.from_pretrained('roberta') _ = Model2Model.from_pretrained("roberta")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = Model2Model.from_pretrained('distilbert') _ = Model2Model.from_pretrained("distilbert")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = Model2Model.from_pretrained('does-not-exist') _ = Model2Model.from_pretrained("does-not-exist")
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -21,10 +21,15 @@ import unittest ...@@ -21,10 +21,15 @@ import unittest
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available(): if is_torch_available():
from transformers import (GPT2Config, GPT2Model, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP, from transformers import (
GPT2LMHeadModel, GPT2DoubleHeadsModel) GPT2Config,
GPT2Model,
from .modeling_common_test import (CommonTestCases, ids_tensor) GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2LMHeadModel,
GPT2DoubleHeadsModel,
)
from .modeling_common_test import CommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
...@@ -35,32 +40,32 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -35,32 +40,32 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else () all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
class GPT2ModelTester(object): class GPT2ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -125,12 +130,20 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -125,12 +130,20 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = GPT2Model(config=config) model = GPT2Model(config=config)
...@@ -146,8 +159,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -146,8 +159,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
"presents": presents, "presents": presents,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertEqual(len(result["presents"]), config.n_layer) self.parent.assertEqual(len(result["presents"]), config.n_layer)
def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
...@@ -157,63 +170,58 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -157,63 +170,58 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss,
"lm_logits": lm_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_double_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args): def create_and_check_double_lm_head_model(
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
):
model = GPT2DoubleHeadsModel(config) model = GPT2DoubleHeadsModel(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'mc_token_ids': mc_token_ids, "input_ids": multiple_choice_inputs_ids,
'attention_mask': multiple_choice_input_mask, "mc_token_ids": mc_token_ids,
'token_type_ids': multiple_choice_token_type_ids, "attention_mask": multiple_choice_input_mask,
'lm_labels': multiple_choice_inputs_ids} "token_type_ids": multiple_choice_token_type_ids,
"lm_labels": multiple_choice_inputs_ids,
}
loss, lm_logits, mc_logits, _ = model(**inputs) loss, lm_logits, mc_logits, _ = model(**inputs)
result = { result = {"loss": loss, "lm_logits": lm_logits, "mc_logits": mc_logits}
"loss": loss,
"lm_logits": lm_logits,
"mc_logits": mc_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices])
list(result["lm_logits"].size()),
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size])
self.parent.assertListEqual(
list(result["mc_logits"].size()),
[self.batch_size, self.num_choices])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = { input_mask,
'input_ids': input_ids, head_mask,
'token_type_ids': token_type_ids, token_type_ids,
'head_mask': head_mask mc_token_ids,
} sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask}
return config, inputs_dict return config, inputs_dict
......
...@@ -21,10 +21,15 @@ import unittest ...@@ -21,10 +21,15 @@ import unittest
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available(): if is_torch_available():
from transformers import (OpenAIGPTConfig, OpenAIGPTModel, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP, from transformers import (
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) OpenAIGPTConfig,
OpenAIGPTModel,
from .modeling_common_test import (CommonTestCases, ids_tensor) OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
OpenAIGPTLMHeadModel,
OpenAIGPTDoubleHeadsModel,
)
from .modeling_common_test import CommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
...@@ -32,33 +37,35 @@ from .utils import CACHE_DIR, require_torch, slow, torch_device ...@@ -32,33 +37,35 @@ from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else () all_model_classes = (
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
)
class OpenAIGPTModelTester(object): class OpenAIGPTModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -116,9 +123,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -116,9 +123,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
return config, input_ids, head_mask, token_type_ids, sequence_labels, token_labels, choice_labels return config, input_ids, head_mask, token_type_ids, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_openai_gpt_model(self, config, input_ids, head_mask, token_type_ids, *args): def create_and_check_openai_gpt_model(self, config, input_ids, head_mask, token_type_ids, *args):
model = OpenAIGPTModel(config=config) model = OpenAIGPTModel(config=config)
...@@ -129,12 +134,10 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -129,12 +134,10 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
model(input_ids, token_type_ids=token_type_ids) model(input_ids, token_type_ids=token_type_ids)
(sequence_output,) = model(input_ids) (sequence_output,) = model(input_ids)
result = { result = {"sequence_output": sequence_output}
"sequence_output": sequence_output
}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): def create_and_check_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args):
model = OpenAIGPTLMHeadModel(config) model = OpenAIGPTLMHeadModel(config)
...@@ -143,17 +146,12 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -143,17 +146,12 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) loss, lm_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss,
"lm_logits": lm_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_double_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): def create_and_check_double_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args):
model = OpenAIGPTDoubleHeadsModel(config) model = OpenAIGPTDoubleHeadsModel(config)
...@@ -162,26 +160,25 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -162,26 +160,25 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, lm_labels=input_ids) loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, lm_labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss,
"lm_logits": lm_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, head_mask, token_type_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs (
inputs_dict = { config,
'input_ids': input_ids, input_ids,
'token_type_ids': token_type_ids, head_mask,
'head_mask': head_mask token_type_ids,
} sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask}
return config, inputs_dict return config, inputs_dict
......
...@@ -22,12 +22,17 @@ from transformers import is_torch_available ...@@ -22,12 +22,17 @@ from transformers import is_torch_available
if is_torch_available(): if is_torch_available():
import torch import torch
from transformers import (RobertaConfig, RobertaModel, RobertaForMaskedLM, from transformers import (
RobertaForSequenceClassification, RobertaForTokenClassification) RobertaConfig,
RobertaModel,
RobertaForMaskedLM,
RobertaForSequenceClassification,
RobertaForTokenClassification,
)
from transformers.modeling_roberta import RobertaEmbeddings from transformers.modeling_roberta import RobertaEmbeddings
from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import CommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
...@@ -38,31 +43,31 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -38,31 +43,31 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else () all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else ()
class RobertaModelTester(object): class RobertaModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -116,17 +121,17 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -116,17 +121,17 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_model(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = RobertaModel(config=config) model = RobertaModel(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -139,47 +144,59 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -139,47 +144,59 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
"pooled_output": pooled_output, "pooled_output": pooled_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_roberta_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_for_masked_lm(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = RobertaForMaskedLM(config=config) model = RobertaForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels) loss, prediction_scores = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_roberta_for_token_classification(self, config, input_ids, token_type_ids, input_mask, def create_and_check_roberta_for_token_classification(
sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = RobertaForTokenClassification(config=config) model = RobertaForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, loss, logits = model(
labels=token_labels) input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].size()), list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -214,18 +231,12 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -214,18 +231,12 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
model = RobertaEmbeddings(config=config) model = RobertaEmbeddings(config=config)
input_ids = torch.as_tensor([[12, 31, 13, model.padding_idx]]) input_ids = torch.as_tensor([[12, 31, 13, model.padding_idx]])
expected_positions = torch.as_tensor([[ expected_positions = torch.as_tensor(
0 + model.padding_idx + 1, [[0 + model.padding_idx + 1, 1 + model.padding_idx + 1, 2 + model.padding_idx + 1, model.padding_idx]]
1 + model.padding_idx + 1, )
2 + model.padding_idx + 1,
model.padding_idx
]])
position_ids = model.create_position_ids_from_input_ids(input_ids) position_ids = model.create_position_ids_from_input_ids(input_ids)
self.assertEqual( self.assertEqual(position_ids.shape, expected_positions.shape)
position_ids.shape,
expected_positions.shape
)
self.assertTrue(torch.all(torch.eq(position_ids, expected_positions))) self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
def test_create_position_ids_from_inputs_embeds(self): def test_create_position_ids_from_inputs_embeds(self):
...@@ -247,69 +258,47 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -247,69 +258,47 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
] ]
expected_positions = torch.as_tensor([expected_single_positions, expected_single_positions]) expected_positions = torch.as_tensor([expected_single_positions, expected_single_positions])
position_ids = embeddings.create_position_ids_from_inputs_embeds(inputs_embeds) position_ids = embeddings.create_position_ids_from_inputs_embeds(inputs_embeds)
self.assertEqual( self.assertEqual(position_ids.shape, expected_positions.shape)
position_ids.shape, self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
expected_positions.shape
)
self.assertTrue(
torch.all(torch.eq(position_ids, expected_positions))
)
class RobertaModelIntegrationTest(unittest.TestCase): class RobertaModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_inference_masked_lm(self): def test_inference_masked_lm(self):
model = RobertaForMaskedLM.from_pretrained('roberta-base') model = RobertaForMaskedLM.from_pretrained("roberta-base")
input_ids = torch.tensor([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = torch.Size((1, 11, 50265)) expected_shape = torch.Size((1, 11, 50265))
self.assertEqual( self.assertEqual(output.shape, expected_shape)
output.shape,
expected_shape
)
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = torch.Tensor( expected_slice = torch.Tensor(
[[[33.8843, -4.3107, 22.7779], [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
[ 4.6533, -2.8099, 13.6252],
[ 1.8222, -3.6898, 8.8600]]]
)
self.assertTrue(
torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3)
) )
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
@slow @slow
def test_inference_no_head(self): def test_inference_no_head(self):
model = RobertaModel.from_pretrained('roberta-base') model = RobertaModel.from_pretrained("roberta-base")
input_ids = torch.tensor([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = torch.Tensor( expected_slice = torch.Tensor(
[[[-0.0231, 0.0782, 0.0074], [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
[-0.1854, 0.0539, -0.0174],
[ 0.0548, 0.0799, 0.1687]]]
)
self.assertTrue(
torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3)
) )
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
@slow @slow
def test_inference_classification_head(self): def test_inference_classification_head(self):
model = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli') model = RobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
input_ids = torch.tensor([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = torch.Size((1, 3)) expected_shape = torch.Size((1, 3))
self.assertEqual( self.assertEqual(output.shape, expected_shape)
output.shape, expected_tensor = torch.Tensor([[-0.9469, 0.3913, 0.5118]])
expected_shape self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-3))
)
expected_tensor = torch.Tensor([[-0.9469, 0.3913, 0.5118]])
self.assertTrue(
torch.allclose(output, expected_tensor, atol=1e-3)
)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -20,12 +20,12 @@ import unittest ...@@ -20,12 +20,12 @@ import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .modeling_common_test import (CommonTestCases, ids_tensor, floats_tensor) from .modeling_common_test import CommonTestCases, ids_tensor, floats_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available(): if is_torch_available():
from transformers import (T5Config, T5Model, T5WithLMHeadModel) from transformers import T5Config, T5Model, T5WithLMHeadModel
from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP
...@@ -39,26 +39,26 @@ class T5ModelTest(CommonTestCases.CommonModelTester): ...@@ -39,26 +39,26 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
is_encoder_decoder = True is_encoder_decoder = True
class T5ModelTester(object): class T5ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
encoder_seq_length=7, encoder_seq_length=7,
decoder_seq_length=9, decoder_seq_length=9,
is_training=True, is_training=True,
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
n_positions=14, n_positions=14,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
dropout_rate=0.1, dropout_rate=0.1,
initializer_factor=0.002, initializer_factor=0.002,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.encoder_seq_length = encoder_seq_length self.encoder_seq_length = encoder_seq_length
...@@ -101,60 +101,96 @@ class T5ModelTest(CommonTestCases.CommonModelTester): ...@@ -101,60 +101,96 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
num_heads=self.num_attention_heads, num_heads=self.num_attention_heads,
relative_attention_num_buckets=self.relative_attention_num_buckets, relative_attention_num_buckets=self.relative_attention_num_buckets,
dropout_rate=self.dropout_rate, dropout_rate=self.dropout_rate,
initializer_factor=self.initializer_factor) initializer_factor=self.initializer_factor,
)
return (config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels)
return (
config,
encoder_input_ids,
decoder_input_ids,
encoder_attention_mask,
decoder_attention_mask,
decoder_lm_labels,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[]) def create_and_check_t5_model(
self,
def create_and_check_t5_model(self, config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels): config,
encoder_input_ids,
decoder_input_ids,
encoder_attention_mask,
decoder_attention_mask,
decoder_lm_labels,
):
model = T5Model(config=config) model = T5Model(config=config)
model.eval() model.eval()
decoder_output, encoder_output = model(encoder_input_ids=encoder_input_ids, decoder_output, encoder_output = model(
decoder_input_ids=decoder_input_ids, encoder_input_ids=encoder_input_ids,
encoder_attention_mask=encoder_attention_mask, decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask) encoder_attention_mask=encoder_attention_mask,
decoder_output, encoder_output = model(encoder_input_ids=encoder_input_ids, decoder_attention_mask=decoder_attention_mask,
decoder_input_ids=decoder_input_ids) )
decoder_output, encoder_output = model(
encoder_input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids
)
result = { result = {
"encoder_output": encoder_output, "encoder_output": encoder_output,
"decoder_output": decoder_output, "decoder_output": decoder_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["encoder_output"].size()), list(result["encoder_output"].size()), [self.batch_size, self.encoder_seq_length, self.hidden_size]
[self.batch_size, self.encoder_seq_length, self.hidden_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["decoder_output"].size()), list(result["decoder_output"].size()), [self.batch_size, self.decoder_seq_length, self.hidden_size]
[self.batch_size, self.decoder_seq_length, self.hidden_size]) )
def create_and_check_t5_with_lm_head(
def create_and_check_t5_with_lm_head(self, config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels): self,
config,
encoder_input_ids,
decoder_input_ids,
encoder_attention_mask,
decoder_attention_mask,
decoder_lm_labels,
):
model = T5WithLMHeadModel(config=config) model = T5WithLMHeadModel(config=config)
model.eval() model.eval()
outputs = model(encoder_input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids, outputs = model(
decoder_attention_mask=decoder_attention_mask, decoder_lm_labels=decoder_lm_labels) encoder_input_ids=encoder_input_ids,
decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask,
decoder_lm_labels=decoder_lm_labels,
)
loss, prediction_scores = outputs[0], outputs[1] loss, prediction_scores = outputs[0], outputs[1]
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.decoder_seq_length, self.vocab_size]
[self.batch_size, self.decoder_seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, (
decoder_attention_mask, decoder_lm_labels) = config_and_inputs config,
inputs_dict = {'encoder_input_ids': encoder_input_ids, encoder_input_ids,
'decoder_input_ids': decoder_input_ids, decoder_input_ids,
'decoder_attention_mask': decoder_attention_mask, encoder_attention_mask,
'encoder_attention_mask': encoder_attention_mask} decoder_attention_mask,
decoder_lm_labels,
) = config_and_inputs
inputs_dict = {
"encoder_input_ids": encoder_input_ids,
"decoder_input_ids": decoder_input_ids,
"decoder_attention_mask": decoder_attention_mask,
"encoder_attention_mask": encoder_attention_mask,
}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -178,5 +214,6 @@ class T5ModelTest(CommonTestCases.CommonModelTester): ...@@ -178,5 +214,6 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
model = T5Model.from_pretrained(model_name, cache_dir=CACHE_DIR) model = T5Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,47 +27,48 @@ from transformers import AlbertConfig, is_tf_available ...@@ -27,47 +27,48 @@ from transformers import AlbertConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_albert import (TFAlbertModel, TFAlbertForMaskedLM, from transformers.modeling_tf_albert import (
TFAlbertForSequenceClassification, TFAlbertModel,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP) TFAlbertForMaskedLM,
TFAlbertForSequenceClassification,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = ( all_model_classes = (
TFAlbertModel, (TFAlbertModel, TFAlbertForMaskedLM, TFAlbertForSequenceClassification) if is_tf_available() else ()
TFAlbertForMaskedLM, )
TFAlbertForSequenceClassification
) if is_tf_available() else ()
class TFAlbertModelTester(object): class TFAlbertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
embedding_size=16, embedding_size=16,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -93,27 +94,22 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -93,27 +94,22 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
self.scope = scope self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor( input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
[self.batch_size, self.seq_length], self.vocab_size)
input_mask = None input_mask = None
if self.use_input_mask: if self.use_input_mask:
input_mask = ids_tensor( input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
[self.batch_size, self.seq_length], vocab_size=2)
token_type_ids = None token_type_ids = None
if self.use_token_type_ids: if self.use_token_type_ids:
token_type_ids = ids_tensor( token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
[self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None sequence_labels = None
token_labels = None token_labels = None
choice_labels = None choice_labels = None
if self.use_labels: if self.use_labels:
sequence_labels = ids_tensor( sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
[self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
token_labels = ids_tensor(
[self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = AlbertConfig( config = AlbertConfig(
...@@ -127,19 +123,20 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -127,19 +123,20 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_albert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFAlbertModel(config=config) model = TFAlbertModel(config=config)
# inputs = {'input_ids': input_ids, # inputs = {'input_ids': input_ids,
# 'attention_mask': input_mask, # 'attention_mask': input_mask,
# 'token_type_ids': token_type_ids} # 'token_type_ids': token_type_ids}
# sequence_output, pooled_output = model(**inputs) # sequence_output, pooled_output = model(**inputs)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output, pooled_output = model(inputs) sequence_output, pooled_output = model(inputs)
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -152,50 +149,52 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -152,50 +149,52 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
"pooled_output": pooled_output.numpy(), "pooled_output": pooled_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].shape), [ self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
self.batch_size, self.hidden_size])
def create_and_check_albert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFAlbertForMaskedLM(config=config) model = TFAlbertForMaskedLM(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (prediction_scores,) = model(inputs)
'token_type_ids': token_type_ids}
prediction_scores, = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_albert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFAlbertForSequenceClassification(config=config) model = TFAlbertForSequenceClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
list(result["logits"].shape),
[self.batch_size, self.num_labels])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, input_ids,
'token_type_ids': token_type_ids, 'attention_mask': input_mask} token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self) self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self)
self.config_tester = ConfigTester( self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
self, config_class=AlbertConfig, hidden_size=37)
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
...@@ -206,13 +205,11 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -206,13 +205,11 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
def test_for_masked_lm(self): def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_albert_for_masked_lm( self.model_tester.create_and_check_albert_for_masked_lm(*config_and_inputs)
*config_and_inputs)
def test_for_sequence_classification(self): def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_albert_for_sequence_classification( self.model_tester.create_and_check_albert_for_sequence_classification(*config_and_inputs)
*config_and_inputs)
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
......
...@@ -25,14 +25,21 @@ from transformers import is_tf_available ...@@ -25,14 +25,21 @@ from transformers import is_tf_available
from .utils import require_tf, slow, SMALL_MODEL_IDENTIFIER from .utils import require_tf, slow, SMALL_MODEL_IDENTIFIER
if is_tf_available(): if is_tf_available():
from transformers import (AutoConfig, BertConfig, from transformers import (
TFAutoModel, TFBertModel, AutoConfig,
TFAutoModelWithLMHead, TFBertForMaskedLM, BertConfig,
TFAutoModelForSequenceClassification, TFBertForSequenceClassification, TFAutoModel,
TFAutoModelForQuestionAnswering, TFBertForQuestionAnswering) TFBertModel,
TFAutoModelWithLMHead,
TFBertForMaskedLM,
TFAutoModelForSequenceClassification,
TFBertForSequenceClassification,
TFAutoModelForQuestionAnswering,
TFBertForQuestionAnswering,
)
from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import CommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
...@@ -41,11 +48,12 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -41,11 +48,12 @@ class TFAutoModelTest(unittest.TestCase):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
import h5py import h5py
self.assertTrue(h5py.version.hdf5_version.startswith("1.10")) self.assertTrue(h5py.version.hdf5_version.startswith("1.10"))
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
...@@ -58,7 +66,7 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -58,7 +66,7 @@ class TFAutoModelTest(unittest.TestCase):
def test_lmhead_model_from_pretrained(self): def test_lmhead_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
...@@ -71,7 +79,7 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -71,7 +79,7 @@ class TFAutoModelTest(unittest.TestCase):
def test_sequence_classification_model_from_pretrained(self): def test_sequence_classification_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
...@@ -84,7 +92,7 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -84,7 +92,7 @@ class TFAutoModelTest(unittest.TestCase):
def test_question_answering_model_from_pretrained(self): def test_question_answering_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
......
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,49 +27,62 @@ from transformers import BertConfig, is_tf_available ...@@ -27,49 +27,62 @@ from transformers import BertConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_bert import (TFBertModel, TFBertForMaskedLM, from transformers.modeling_tf_bert import (
TFBertForNextSentencePrediction, TFBertModel,
TFBertForPreTraining, TFBertForMaskedLM,
TFBertForSequenceClassification, TFBertForNextSentencePrediction,
TFBertForMultipleChoice, TFBertForPreTraining,
TFBertForTokenClassification, TFBertForSequenceClassification,
TFBertForQuestionAnswering, TFBertForMultipleChoice,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP) TFBertForTokenClassification,
TFBertForQuestionAnswering,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFBertModel, TFBertForMaskedLM, TFBertForNextSentencePrediction, all_model_classes = (
TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification, (
TFBertForTokenClassification) if is_tf_available() else () TFBertModel,
TFBertForMaskedLM,
TFBertForNextSentencePrediction,
TFBertForPreTraining,
TFBertForQuestionAnswering,
TFBertForSequenceClassification,
TFBertForTokenClassification,
)
if is_tf_available()
else ()
)
class TFBertModelTester(object): class TFBertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -123,15 +136,16 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -123,15 +136,16 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertModel(config=config) model = TFBertModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output, pooled_output = model(inputs) sequence_output, pooled_output = model(inputs)
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -144,128 +158,119 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -144,128 +158,119 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
"pooled_output": pooled_output.numpy(), "pooled_output": pooled_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
def create_and_check_bert_for_masked_lm(
def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForMaskedLM(config=config) model = TFBertForMaskedLM(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (prediction_scores,) = model(inputs)
'token_type_ids': token_type_ids}
prediction_scores, = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_bert_for_next_sequence_prediction(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_next_sequence_prediction(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForNextSentencePrediction(config=config) model = TFBertForNextSentencePrediction(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (seq_relationship_score,) = model(inputs)
'token_type_ids': token_type_ids}
seq_relationship_score, = model(inputs)
result = { result = {
"seq_relationship_score": seq_relationship_score.numpy(), "seq_relationship_score": seq_relationship_score.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
list(result["seq_relationship_score"].shape),
[self.batch_size, 2])
def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_pretraining(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForPreTraining(config=config) model = TFBertForPreTraining(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores, seq_relationship_score = model(inputs) prediction_scores, seq_relationship_score = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
"seq_relationship_score": seq_relationship_score.numpy(), "seq_relationship_score": seq_relationship_score.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
list(result["seq_relationship_score"].shape),
[self.batch_size, 2])
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFBertForSequenceClassification(config=config) model = TFBertForSequenceClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
list(result["logits"].shape),
[self.batch_size, self.num_labels])
def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_multiple_choice(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices config.num_choices = self.num_choices
model = TFBertForMultipleChoice(config=config) model = TFBertForMultipleChoice(config=config)
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'attention_mask': multiple_choice_input_mask, "input_ids": multiple_choice_inputs_ids,
'token_type_ids': multiple_choice_token_type_ids} "attention_mask": multiple_choice_input_mask,
logits, = model(inputs) "token_type_ids": multiple_choice_token_type_ids,
}
(logits,) = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
list(result["logits"].shape),
[self.batch_size, self.num_choices])
def create_and_check_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFBertForTokenClassification(config=config) model = TFBertForTokenClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].shape), list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
def create_and_check_bert_for_question_answering(
def create_and_check_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForQuestionAnswering(config=config) model = TFBertForQuestionAnswering(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
start_logits, end_logits = model(inputs) start_logits, end_logits = model(inputs)
result = { result = {
"start_logits": start_logits.numpy(), "start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(), "end_logits": end_logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
list(result["start_logits"].shape), self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].shape),
[self.batch_size, self.seq_length])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -310,10 +315,10 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -310,10 +315,10 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
model = TFBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -35,6 +35,7 @@ if is_tf_available(): ...@@ -35,6 +35,7 @@ if is_tf_available():
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from transformers import TFPreTrainedModel from transformers import TFPreTrainedModel
# from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP # from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
...@@ -42,25 +43,31 @@ if sys.version_info[0] == 2: ...@@ -42,25 +43,31 @@ if sys.version_info[0] == 2:
class TemporaryDirectory(object): class TemporaryDirectory(object):
"""Context manager for tempfile.mkdtemp() so it's usable with "with" statement.""" """Context manager for tempfile.mkdtemp() so it's usable with "with" statement."""
def __enter__(self): def __enter__(self):
self.name = tempfile.mkdtemp() self.name = tempfile.mkdtemp()
return self.name return self.name
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
shutil.rmtree(self.name) shutil.rmtree(self.name)
else: else:
import pickle import pickle
TemporaryDirectory = tempfile.TemporaryDirectory TemporaryDirectory = tempfile.TemporaryDirectory
unicode = str unicode = str
def _config_zero_init(config): def _config_zero_init(config):
configs_no_init = copy.deepcopy(config) configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys(): for key in configs_no_init.__dict__.keys():
if '_range' in key or '_std' in key: if "_range" in key or "_std" in key:
setattr(configs_no_init, key, 0.0) setattr(configs_no_init, key, 0.0)
return configs_no_init return configs_no_init
class TFCommonTestCases:
class TFCommonTestCases:
@require_tf @require_tf
class TFCommonModelTester(unittest.TestCase): class TFCommonModelTester(unittest.TestCase):
...@@ -126,8 +133,9 @@ class TFCommonTestCases: ...@@ -126,8 +133,9 @@ class TFCommonTestCases:
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
pt_inputs_dict = dict((name, torch.from_numpy(key.numpy()).to(torch.long)) pt_inputs_dict = dict(
for name, key in inputs_dict.items()) (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
)
with torch.no_grad(): with torch.no_grad():
pto = pt_model(**pt_inputs_dict) pto = pt_model(**pt_inputs_dict)
tfo = tf_model(inputs_dict, training=False) tfo = tf_model(inputs_dict, training=False)
...@@ -140,18 +148,19 @@ class TFCommonTestCases: ...@@ -140,18 +148,19 @@ class TFCommonTestCases:
# Check we can load pt model in tf and vice-versa with checkpoint => model functions # Check we can load pt model in tf and vice-versa with checkpoint => model functions
with TemporaryDirectory() as tmpdirname: with TemporaryDirectory() as tmpdirname:
pt_checkpoint_path = os.path.join(tmpdirname, 'pt_model.bin') pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
torch.save(pt_model.state_dict(), pt_checkpoint_path) torch.save(pt_model.state_dict(), pt_checkpoint_path)
tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path) tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)
tf_checkpoint_path = os.path.join(tmpdirname, 'tf_model.h5') tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path) tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
pt_inputs_dict = dict((name, torch.from_numpy(key.numpy()).to(torch.long)) pt_inputs_dict = dict(
for name, key in inputs_dict.items()) (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
)
with torch.no_grad(): with torch.no_grad():
pto = pt_model(**pt_inputs_dict) pto = pt_model(**pt_inputs_dict)
tfo = tf_model(inputs_dict) tfo = tf_model(inputs_dict)
...@@ -166,13 +175,19 @@ class TFCommonTestCases: ...@@ -166,13 +175,19 @@ class TFCommonTestCases:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if self.is_encoder_decoder: if self.is_encoder_decoder:
input_ids = {'decoder_input_ids': tf.keras.Input(batch_shape=(2, 2000), name='decoder_input_ids', dtype='int32'), input_ids = {
'encoder_input_ids': tf.keras.Input(batch_shape=(2, 2000), name='encoder_input_ids', dtype='int32')} "decoder_input_ids": tf.keras.Input(
batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"
),
"encoder_input_ids": tf.keras.Input(
batch_shape=(2, 2000), name="encoder_input_ids", dtype="int32"
),
}
else: else:
input_ids = tf.keras.Input(batch_shape=(2, 2000), name='input_ids', dtype='int32') input_ids = tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32")
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0) optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
# Prepare our model # Prepare our model
...@@ -188,7 +203,7 @@ class TFCommonTestCases: ...@@ -188,7 +203,7 @@ class TFCommonTestCases:
hidden_states = outputs_dict[0] hidden_states = outputs_dict[0]
# Add a dense layer on top to test intetgration with other keras modules # Add a dense layer on top to test intetgration with other keras modules
outputs = tf.keras.layers.Dense(2, activation='softmax', name='outputs')(hidden_states) outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)
# Compile extended model # Compile extended model
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs]) extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
...@@ -202,7 +217,9 @@ class TFCommonTestCases: ...@@ -202,7 +217,9 @@ class TFCommonTestCases:
outputs_dict = model(inputs_dict) outputs_dict = model(inputs_dict)
inputs_keywords = copy.deepcopy(inputs_dict) inputs_keywords = copy.deepcopy(inputs_dict)
input_ids = inputs_keywords.pop('input_ids' if not self.is_encoder_decoder else 'decoder_input_ids', None) input_ids = inputs_keywords.pop(
"input_ids" if not self.is_encoder_decoder else "decoder_input_ids", None
)
outputs_keywords = model(input_ids, **inputs_keywords) outputs_keywords = model(input_ids, **inputs_keywords)
output_dict = outputs_dict[0].numpy() output_dict = outputs_dict[0].numpy()
...@@ -213,10 +230,22 @@ class TFCommonTestCases: ...@@ -213,10 +230,22 @@ class TFCommonTestCases:
def test_attention_outputs(self): def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
decoder_seq_length = self.model_tester.decoder_seq_length if hasattr(self.model_tester, 'decoder_seq_length') else self.model_tester.seq_length decoder_seq_length = (
encoder_seq_length = self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length self.model_tester.decoder_seq_length
decoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else decoder_seq_length if hasattr(self.model_tester, "decoder_seq_length")
encoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else encoder_seq_length else self.model_tester.seq_length
)
encoder_seq_length = (
self.model_tester.encoder_seq_length
if hasattr(self.model_tester, "encoder_seq_length")
else self.model_tester.seq_length
)
decoder_key_length = (
self.model_tester.key_length if hasattr(self.model_tester, "key_length") else decoder_seq_length
)
encoder_key_length = (
self.model_tester.key_length if hasattr(self.model_tester, "key_length") else encoder_seq_length
)
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config.output_attentions = True config.output_attentions = True
...@@ -229,22 +258,20 @@ class TFCommonTestCases: ...@@ -229,22 +258,20 @@ class TFCommonTestCases:
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_seq_length, )
encoder_key_length])
out_len = len(outputs) out_len = len(outputs)
if self.is_encoder_decoder: if self.is_encoder_decoder:
self.assertEqual(out_len % 2, 0) self.assertEqual(out_len % 2, 0)
decoder_attentions = outputs[(out_len // 2)-1] decoder_attentions = outputs[(out_len // 2) - 1]
self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False) self.assertEqual(model.config.output_hidden_states, False)
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(decoder_attentions[0].shape[-3:]), list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
decoder_seq_length, )
decoder_key_length])
# Check attention is always last and order is fine # Check attention is always last and order is fine
config.output_attentions = True config.output_attentions = True
...@@ -259,9 +286,8 @@ class TFCommonTestCases: ...@@ -259,9 +286,8 @@ class TFCommonTestCases:
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_seq_length, )
encoder_key_length])
def test_hidden_states_output(self): def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -276,8 +302,8 @@ class TFCommonTestCases: ...@@ -276,8 +302,8 @@ class TFCommonTestCases:
self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config.output_hidden_states, True)
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size]
[self.model_tester.seq_length, self.model_tester.hidden_size]) )
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -357,9 +383,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None): ...@@ -357,9 +383,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):
for _ in range(total_dims): for _ in range(total_dims):
values.append(rng.randint(0, vocab_size - 1)) values.append(rng.randint(0, vocab_size - 1))
output = tf.constant(values, output = tf.constant(values, shape=shape, dtype=dtype if dtype is not None else tf.int32)
shape=shape,
dtype=dtype if dtype is not None else tf.int32)
return output return output
......
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,8 +27,7 @@ from transformers import CTRLConfig, is_tf_available ...@@ -27,8 +27,7 @@ from transformers import CTRLConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_ctrl import (TFCTRLModel, TFCTRLLMHeadModel, from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP)
@require_tf @require_tf
...@@ -37,32 +36,32 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -37,32 +36,32 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else () all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else ()
class TFCTRLModelTester(object): class TFCTRLModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -127,13 +126,21 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -127,13 +126,21 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFCTRLModel(config=config) model = TFCTRLModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, None, input_mask] # None is the input for 'past' inputs = [input_ids, None, input_mask] # None is the input for 'past'
...@@ -145,30 +152,36 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -145,30 +152,36 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFCTRLLMHeadModel(config=config) model = TFCTRLLMHeadModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores = model(inputs)[0] prediction_scores = model(inputs)[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -192,6 +205,6 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -192,6 +205,6 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
model = TFCTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFCTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -18,7 +18,7 @@ from __future__ import print_function ...@@ -18,7 +18,7 @@ from __future__ import print_function
import unittest import unittest
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -26,48 +26,58 @@ from transformers import DistilBertConfig, is_tf_available ...@@ -26,48 +26,58 @@ from transformers import DistilBertConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_distilbert import (TFDistilBertModel, from transformers.modeling_tf_distilbert import (
TFDistilBertForMaskedLM, TFDistilBertModel,
TFDistilBertForQuestionAnswering, TFDistilBertForMaskedLM,
TFDistilBertForSequenceClassification) TFDistilBertForQuestionAnswering,
TFDistilBertForSequenceClassification,
)
@require_tf @require_tf
class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, all_model_classes = (
TFDistilBertForSequenceClassification) if is_tf_available() else None (
TFDistilBertModel,
TFDistilBertForMaskedLM,
TFDistilBertForQuestionAnswering,
TFDistilBertForSequenceClassification,
)
if is_tf_available()
else None
)
test_pruning = True test_pruning = True
test_torchscript = True test_torchscript = True
test_resize_embeddings = True test_resize_embeddings = True
test_head_masking = True test_head_masking = True
class TFDistilBertModelTester(object): class TFDistilBertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=False, use_token_type_ids=False,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -116,14 +126,16 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -116,14 +126,16 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
dropout=self.hidden_dropout_prob, dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob, attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_distilbert_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_model(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertModel(config=config) model = TFDistilBertModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
outputs = model(inputs) outputs = model(inputs)
sequence_output = outputs[0] sequence_output = outputs[0]
...@@ -136,54 +148,51 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -136,54 +148,51 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_distilbert_for_masked_lm(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_masked_lm(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertForMaskedLM(config=config) model = TFDistilBertForMaskedLM(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
(prediction_scores,) = model(inputs) (prediction_scores,) = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_distilbert_for_question_answering(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_question_answering(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertForQuestionAnswering(config=config) model = TFDistilBertForQuestionAnswering(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
start_logits, end_logits = model(inputs) start_logits, end_logits = model(inputs)
result = { result = {
"start_logits": start_logits.numpy(), "start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(), "end_logits": end_logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
list(result["start_logits"].shape), self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].shape),
[self.batch_size, self.seq_length])
def create_and_check_distilbert_for_sequence_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_sequence_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFDistilBertForSequenceClassification(config) model = TFDistilBertForSequenceClassification(config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
(logits,) = model(inputs) (logits,) = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
list(result["logits"].shape),
[self.batch_size, self.num_labels])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'attention_mask': input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -215,5 +224,6 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -215,5 +224,6 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) # model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model) # self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,45 +27,47 @@ from transformers import GPT2Config, is_tf_available ...@@ -27,45 +27,47 @@ from transformers import GPT2Config, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_gpt2 import (TFGPT2Model, TFGPT2LMHeadModel, from transformers.modeling_tf_gpt2 import (
TFGPT2DoubleHeadsModel, TFGPT2Model,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP) TFGPT2LMHeadModel,
TFGPT2DoubleHeadsModel,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else ()
TFGPT2DoubleHeadsModel) if is_tf_available() else ()
# all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel) if is_tf_available() else () # all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel) if is_tf_available() else ()
class TFGPT2ModelTester(object): class TFGPT2ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -130,13 +132,21 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -130,13 +132,21 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFGPT2Model(config=config) model = TFGPT2Model(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, None, input_mask] # None is the input for 'past' inputs = [input_ids, None, input_mask] # None is the input for 'past'
...@@ -148,54 +158,58 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -148,54 +158,58 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_gpt2_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_gpt2_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFGPT2LMHeadModel(config=config) model = TFGPT2LMHeadModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores = model(inputs)[0] prediction_scores = model(inputs)[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_gpt2_double_head(self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args): def create_and_check_gpt2_double_head(
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
):
model = TFGPT2DoubleHeadsModel(config=config) model = TFGPT2DoubleHeadsModel(config=config)
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'mc_token_ids': mc_token_ids, "input_ids": multiple_choice_inputs_ids,
'attention_mask': multiple_choice_input_mask, "mc_token_ids": mc_token_ids,
'token_type_ids': multiple_choice_token_type_ids} "attention_mask": multiple_choice_input_mask,
lm_logits, mc_logits = model(inputs)[:2] "token_type_ids": multiple_choice_token_type_ids,
result = {
"lm_logits": lm_logits.numpy(),
"mc_logits": mc_logits.numpy()
} }
lm_logits, mc_logits = model(inputs)[:2]
result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits"].shape), list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices])
list(result["mc_logits"].shape),
[self.batch_size, self.num_choices])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -223,6 +237,6 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -223,6 +237,6 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
model = TFGPT2Model.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFGPT2Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,44 +27,48 @@ from transformers import OpenAIGPTConfig, is_tf_available ...@@ -27,44 +27,48 @@ from transformers import OpenAIGPTConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_openai import (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, from transformers.modeling_tf_openai import (
TFOpenAIGPTDoubleHeadsModel, TFOpenAIGPTModel,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP) TFOpenAIGPTLMHeadModel,
TFOpenAIGPTDoubleHeadsModel,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, all_model_classes = (
TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else () (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
)
class TFOpenAIGPTModelTester(object): class TFOpenAIGPTModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -129,13 +133,21 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -129,13 +133,21 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFOpenAIGPTModel(config=config) model = TFOpenAIGPTModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -147,54 +159,58 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -147,54 +159,58 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFOpenAIGPTLMHeadModel(config=config) model = TFOpenAIGPTLMHeadModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores = model(inputs)[0] prediction_scores = model(inputs)[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_openai_gpt_double_head(self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args): def create_and_check_openai_gpt_double_head(
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
):
model = TFOpenAIGPTDoubleHeadsModel(config=config) model = TFOpenAIGPTDoubleHeadsModel(config=config)
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'mc_token_ids': mc_token_ids, "input_ids": multiple_choice_inputs_ids,
'attention_mask': multiple_choice_input_mask, "mc_token_ids": mc_token_ids,
'token_type_ids': multiple_choice_token_type_ids} "attention_mask": multiple_choice_input_mask,
lm_logits, mc_logits = model(inputs)[:2] "token_type_ids": multiple_choice_token_type_ids,
result = {
"lm_logits": lm_logits.numpy(),
"mc_logits": mc_logits.numpy()
} }
lm_logits, mc_logits = model(inputs)[:2]
result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits"].shape), list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices])
list(result["mc_logits"].shape),
[self.batch_size, self.num_choices])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -222,6 +238,6 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -222,6 +238,6 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -18,7 +18,7 @@ from __future__ import print_function ...@@ -18,7 +18,7 @@ from __future__ import print_function
import unittest import unittest
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,44 +27,48 @@ from transformers import RobertaConfig, is_tf_available ...@@ -27,44 +27,48 @@ from transformers import RobertaConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
import numpy import numpy
from transformers.modeling_tf_roberta import (TFRobertaModel, TFRobertaForMaskedLM, from transformers.modeling_tf_roberta import (
TFRobertaForSequenceClassification, TFRobertaModel,
TFRobertaForTokenClassification, TFRobertaForMaskedLM,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP) TFRobertaForSequenceClassification,
TFRobertaForTokenClassification,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFRobertaModel,TFRobertaForMaskedLM, all_model_classes = (
TFRobertaForSequenceClassification) if is_tf_available() else () (TFRobertaModel, TFRobertaForMaskedLM, TFRobertaForSequenceClassification) if is_tf_available() else ()
)
class TFRobertaModelTester(object): class TFRobertaModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -118,16 +122,16 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -118,16 +122,16 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_model(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFRobertaModel(config=config) model = TFRobertaModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -139,39 +143,47 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -139,39 +143,47 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_roberta_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_for_masked_lm(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFRobertaForMaskedLM(config=config) model = TFRobertaForMaskedLM(config=config)
prediction_scores = model([input_ids, input_mask, token_type_ids])[0] prediction_scores = model([input_ids, input_mask, token_type_ids])[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_roberta_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_roberta_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFRobertaForTokenClassification(config=config) model = TFRobertaForTokenClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].shape), list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -196,61 +208,43 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -196,61 +208,43 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
self.assertIsNotNone(model) self.assertIsNotNone(model)
class TFRobertaModelIntegrationTest(unittest.TestCase): class TFRobertaModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_inference_masked_lm(self): def test_inference_masked_lm(self):
model = TFRobertaForMaskedLM.from_pretrained('roberta-base') model = TFRobertaForMaskedLM.from_pretrained("roberta-base")
input_ids = tf.constant([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = [1, 11, 50265] expected_shape = [1, 11, 50265]
self.assertEqual( self.assertEqual(list(output.numpy().shape), expected_shape)
list(output.numpy().shape),
expected_shape
)
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = tf.constant( expected_slice = tf.constant(
[[[33.8843, -4.3107, 22.7779], [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
[ 4.6533, -2.8099, 13.6252],
[ 1.8222, -3.6898, 8.8600]]]
)
self.assertTrue(
numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3)
) )
self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3))
@slow @slow
def test_inference_no_head(self): def test_inference_no_head(self):
model = TFRobertaModel.from_pretrained('roberta-base') model = TFRobertaModel.from_pretrained("roberta-base")
input_ids = tf.constant([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = tf.constant( expected_slice = tf.constant(
[[[-0.0231, 0.0782, 0.0074], [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
[-0.1854, 0.0539, -0.0174],
[ 0.0548, 0.0799, 0.1687]]]
)
self.assertTrue(
numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3)
) )
self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3))
@slow @slow
def test_inference_classification_head(self): def test_inference_classification_head(self):
model = TFRobertaForSequenceClassification.from_pretrained('roberta-large-mnli') model = TFRobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
input_ids = tf.constant([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = [1, 3] expected_shape = [1, 3]
self.assertEqual( self.assertEqual(list(output.numpy().shape), expected_shape)
list(output.numpy().shape), expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
expected_shape self.assertTrue(numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-3))
)
expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
self.assertTrue(
numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-3)
)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,8 +27,7 @@ from transformers import T5Config, is_tf_available ...@@ -27,8 +27,7 @@ from transformers import T5Config, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_t5 import (TFT5Model, TFT5WithLMHeadModel, from transformers.modeling_tf_t5 import TFT5Model, TFT5WithLMHeadModel, TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP)
@require_tf @require_tf
...@@ -38,25 +37,25 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -38,25 +37,25 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFT5Model, TFT5WithLMHeadModel) if is_tf_available() else () all_model_classes = (TFT5Model, TFT5WithLMHeadModel) if is_tf_available() else ()
class TFT5ModelTester(object): class TFT5ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
n_positions=14, n_positions=14,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
dropout_rate=0.1, dropout_rate=0.1,
initializer_factor=0.002, initializer_factor=0.002,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -95,53 +94,58 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -95,53 +94,58 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester):
num_heads=self.num_attention_heads, num_heads=self.num_attention_heads,
relative_attention_num_buckets=self.relative_attention_num_buckets, relative_attention_num_buckets=self.relative_attention_num_buckets,
dropout_rate=self.dropout_rate, dropout_rate=self.dropout_rate,
initializer_factor=self.initializer_factor) initializer_factor=self.initializer_factor,
)
return (config, input_ids, input_mask, token_labels) return (config, input_ids, input_mask, token_labels)
def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels): def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
model = TFT5Model(config=config) model = TFT5Model(config=config)
inputs = {'encoder_input_ids': input_ids, inputs = {
'decoder_input_ids': input_ids, "encoder_input_ids": input_ids,
'decoder_attention_mask': input_mask} "decoder_input_ids": input_ids,
"decoder_attention_mask": input_mask,
}
encoder_output, decoder_output = model(inputs) encoder_output, decoder_output = model(inputs)
encoder_output, decoder_output = model(input_ids, encoder_output, decoder_output = model(
decoder_attention_mask=input_mask, input_ids, decoder_attention_mask=input_mask, encoder_input_ids=input_ids
encoder_input_ids=input_ids) )
result = { result = {
"encoder_output": encoder_output.numpy(), "encoder_output": encoder_output.numpy(),
"decoder_output": decoder_output.numpy(), "decoder_output": decoder_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["encoder_output"].shape), list(result["encoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["decoder_output"].shape), list(result["decoder_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels): def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
model = TFT5WithLMHeadModel(config=config) model = TFT5WithLMHeadModel(config=config)
inputs = {'encoder_input_ids': input_ids, inputs = {
'decoder_input_ids': input_ids, "encoder_input_ids": input_ids,
'decoder_attention_mask': input_mask} "decoder_input_ids": input_ids,
"decoder_attention_mask": input_mask,
}
prediction_scores, decoder_output = model(inputs) prediction_scores, decoder_output = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, token_labels) = config_and_inputs (config, input_ids, input_mask, token_labels) = config_and_inputs
inputs_dict = {'encoder_input_ids': input_ids, inputs_dict = {
'decoder_input_ids': input_ids, "encoder_input_ids": input_ids,
'decoder_attention_mask': input_mask} "decoder_input_ids": input_ids,
"decoder_attention_mask": input_mask,
}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -161,9 +165,10 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -161,9 +165,10 @@ class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in ['t5-small']: for model_name in ["t5-small"]:
model = TFT5Model.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFT5Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -19,7 +19,7 @@ from __future__ import print_function ...@@ -19,7 +19,7 @@ from __future__ import print_function
import unittest import unittest
import random import random
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -27,9 +27,11 @@ from transformers import TransfoXLConfig, is_tf_available ...@@ -27,9 +27,11 @@ from transformers import TransfoXLConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_transfo_xl import (TFTransfoXLModel, from transformers.modeling_tf_transfo_xl import (
TFTransfoXLLMHeadModel, TFTransfoXLModel,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP) TFTransfoXLLMHeadModel,
TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
...@@ -41,27 +43,27 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -41,27 +43,27 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
test_resize_embeddings = False test_resize_embeddings = False
class TFTransfoXLModelTester(object): class TFTransfoXLModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
mem_len=30, mem_len=30,
clamp_len=15, clamp_len=15,
is_training=True, is_training=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
cutoffs=[10, 50, 80], cutoffs=[10, 50, 80],
hidden_size=32, hidden_size=32,
d_embed=32, d_embed=32,
num_attention_heads=4, num_attention_heads=4,
d_head=8, d_head=8,
d_inner=128, d_inner=128,
div_val=2, div_val=2,
num_hidden_layers=5, num_hidden_layers=5,
scope=None, scope=None,
seed=1, seed=1,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -101,7 +103,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -101,7 +103,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
d_head=self.d_head, d_head=self.d_head,
d_inner=self.d_inner, d_inner=self.d_inner,
div_val=self.div_val, div_val=self.div_val,
n_layer=self.num_hidden_layers) n_layer=self.num_hidden_layers,
)
return (config, input_ids_1, input_ids_2, lm_labels) return (config, input_ids_1, input_ids_2, lm_labels)
...@@ -114,8 +117,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -114,8 +117,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
hidden_states_1, mems_1 = model(input_ids_1) hidden_states_1, mems_1 = model(input_ids_1)
inputs = {'input_ids': input_ids_2, inputs = {"input_ids": input_ids_2, "mems": mems_1}
'mems': mems_1}
hidden_states_2, mems_2 = model(inputs) hidden_states_2, mems_2 = model(inputs)
...@@ -127,33 +129,31 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -127,33 +129,31 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["hidden_states_1"].shape), list(result["hidden_states_1"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["hidden_states_2"].shape), list(result["hidden_states_2"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]), list(list(mem.shape) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_2"]), list(list(mem.shape) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels): def create_and_check_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2, lm_labels):
model = TFTransfoXLLMHeadModel(config) model = TFTransfoXLLMHeadModel(config)
lm_logits_1, mems_1 = model(input_ids_1) lm_logits_1, mems_1 = model(input_ids_1)
inputs = {'input_ids': input_ids_1, inputs = {"input_ids": input_ids_1, "labels": lm_labels}
'labels': lm_labels}
_, mems_1 = model(inputs) _, mems_1 = model(inputs)
lm_logits_2, mems_2 = model([input_ids_2, mems_1]) lm_logits_2, mems_2 = model([input_ids_2, mems_1])
inputs = {'input_ids': input_ids_1, inputs = {"input_ids": input_ids_1, "mems": mems_1, "labels": lm_labels}
'mems': mems_1,
'labels': lm_labels}
_, mems_2 = model(inputs) _, mems_2 = model(inputs)
...@@ -165,26 +165,27 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -165,26 +165,27 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits_1"].shape), list(result["lm_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]), list(list(mem.shape) for mem in result["mems_1"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits_2"].shape), list(result["lm_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_2"]), list(list(mem.shape) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs (config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids_1} inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = TFTransfoXLModelTest.TFTransfoXLModelTester(self) self.model_tester = TFTransfoXLModelTest.TFTransfoXLModelTester(self)
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
......
...@@ -22,13 +22,16 @@ from transformers import is_tf_available ...@@ -22,13 +22,16 @@ from transformers import is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers import (XLMConfig, TFXLMModel, from transformers import (
TFXLMWithLMHeadModel, XLMConfig,
TFXLMForSequenceClassification, TFXLMModel,
TFXLMForQuestionAnsweringSimple, TFXLMWithLMHeadModel,
TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP) TFXLMForSequenceClassification,
TFXLMForQuestionAnsweringSimple,
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -36,43 +39,44 @@ from .utils import CACHE_DIR, require_tf, slow ...@@ -36,43 +39,44 @@ from .utils import CACHE_DIR, require_tf, slow
@require_tf @require_tf
class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFXLMModel, TFXLMWithLMHeadModel, all_model_classes = (
TFXLMForSequenceClassification, (TFXLMModel, TFXLMWithLMHeadModel, TFXLMForSequenceClassification, TFXLMForQuestionAnsweringSimple)
TFXLMForQuestionAnsweringSimple) if is_tf_available() else () if is_tf_available()
else ()
)
class TFXLMModelTester(object): class TFXLMModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_lengths=True, use_input_lengths=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
gelu_activation=True, gelu_activation=True,
sinusoidal_embeddings=False, sinusoidal_embeddings=False,
causal=False, causal=False,
asm=False, asm=False,
n_langs=2, n_langs=2,
vocab_size=99, vocab_size=99,
n_special=0, n_special=0,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
summary_type="last", summary_type="last",
use_proj=True, use_proj=True,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -109,7 +113,9 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -109,7 +113,9 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
input_lengths = None input_lengths = None
if self.use_input_lengths: if self.use_input_lengths:
input_lengths = ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2 # small variation of seq_length input_lengths = (
ids_tensor([self.batch_size], vocab_size=2) + self.seq_length - 2
) # small variation of seq_length
token_type_ids = None token_type_ids = None
if self.use_token_type_ids: if self.use_token_type_ids:
...@@ -124,30 +130,48 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -124,30 +130,48 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLMConfig( config = XLMConfig(
vocab_size=self.vocab_size, vocab_size=self.vocab_size,
n_special=self.n_special, n_special=self.n_special,
emb_dim=self.hidden_size, emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers, n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads, n_heads=self.num_attention_heads,
dropout=self.hidden_dropout_prob, dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob, attention_dropout=self.attention_probs_dropout_prob,
gelu_activation=self.gelu_activation, gelu_activation=self.gelu_activation,
sinusoidal_embeddings=self.sinusoidal_embeddings, sinusoidal_embeddings=self.sinusoidal_embeddings,
asm=self.asm, asm=self.asm,
causal=self.causal, causal=self.causal,
n_langs=self.n_langs, n_langs=self.n_langs,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
summary_type=self.summary_type, summary_type=self.summary_type,
use_proj=self.use_proj) use_proj=self.use_proj,
)
return config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask
return (
def create_and_check_xlm_model(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask): config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
)
def create_and_check_xlm_model(
self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMModel(config=config) model = TFXLMModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
'lengths': input_lengths,
'langs': token_type_ids}
outputs = model(inputs) outputs = model(inputs)
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -157,16 +181,23 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -157,16 +181,23 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_xlm_lm_head(
def create_and_check_xlm_lm_head(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask): self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMWithLMHeadModel(config) model = TFXLMWithLMHeadModel(config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "lengths": input_lengths, "langs": token_type_ids}
'lengths': input_lengths,
'langs': token_type_ids}
outputs = model(inputs) outputs = model(inputs)
logits = outputs[0] logits = outputs[0]
...@@ -176,15 +207,23 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -176,15 +207,23 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].shape), list(result["logits"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_xlm_qa(
def create_and_check_xlm_qa(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask): self,
config,
input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForQuestionAnsweringSimple(config) model = TFXLMForQuestionAnsweringSimple(config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "lengths": input_lengths}
'lengths': input_lengths}
outputs = model(inputs) outputs = model(inputs)
start_logits, end_logits = model(inputs) start_logits, end_logits = model(inputs)
...@@ -194,19 +233,23 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -194,19 +233,23 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
"end_logits": end_logits.numpy(), "end_logits": end_logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
list(result["start_logits"].shape), self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual( def create_and_check_xlm_sequence_classif(
list(result["end_logits"].shape), self,
[self.batch_size, self.seq_length]) config,
input_ids,
token_type_ids,
def create_and_check_xlm_sequence_classif(self, config, input_ids, token_type_ids, input_lengths, sequence_labels, token_labels, is_impossible_labels, input_mask): input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
):
model = TFXLMForSequenceClassification(config) model = TFXLMForSequenceClassification(config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "lengths": input_lengths}
'lengths': input_lengths}
(logits,) = model(inputs) (logits,) = model(inputs)
...@@ -214,16 +257,26 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -214,16 +257,26 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
list(result["logits"].shape),
[self.batch_size, self.type_sequence_label_size])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_lengths, (
sequence_labels, token_labels, is_impossible_labels, input_mask) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'langs': token_type_ids, 'lengths': input_lengths} input_ids,
token_type_ids,
input_lengths,
sequence_labels,
token_labels,
is_impossible_labels,
input_mask,
) = config_and_inputs
inputs_dict = {
"input_ids": input_ids,
"token_type_ids": token_type_ids,
"langs": token_type_ids,
"lengths": input_lengths,
}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
......
...@@ -26,13 +26,16 @@ from transformers import XLNetConfig, is_tf_available ...@@ -26,13 +26,16 @@ from transformers import XLNetConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_xlnet import (TFXLNetModel, TFXLNetLMHeadModel, from transformers.modeling_tf_xlnet import (
TFXLNetForSequenceClassification, TFXLNetModel,
TFXLNetForTokenClassification, TFXLNetLMHeadModel,
TFXLNetForQuestionAnsweringSimple, TFXLNetForSequenceClassification,
TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP) TFXLNetForTokenClassification,
TFXLNetForQuestionAnsweringSimple,
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP,
)
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
...@@ -40,37 +43,44 @@ from .utils import CACHE_DIR, require_tf, slow ...@@ -40,37 +43,44 @@ from .utils import CACHE_DIR, require_tf, slow
@require_tf @require_tf
class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes=(TFXLNetModel, TFXLNetLMHeadModel, all_model_classes = (
TFXLNetForSequenceClassification, (
TFXLNetForTokenClassification, TFXLNetModel,
TFXLNetForQuestionAnsweringSimple) if is_tf_available() else () TFXLNetLMHeadModel,
TFXLNetForSequenceClassification,
TFXLNetForTokenClassification,
TFXLNetForQuestionAnsweringSimple,
)
if is_tf_available()
else ()
)
test_pruning = False test_pruning = False
class TFXLNetModelTester(object): class TFXLNetModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
mem_len=10, mem_len=10,
clamp_len=-1, clamp_len=-1,
reuse_len=15, reuse_len=15,
is_training=True, is_training=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
cutoffs=[10, 50, 80], cutoffs=[10, 50, 80],
hidden_size=32, hidden_size=32,
num_attention_heads=4, num_attention_heads=4,
d_inner=128, d_inner=128,
num_hidden_layers=5, num_hidden_layers=5,
type_sequence_label_size=2, type_sequence_label_size=2,
untie_r=True, untie_r=True,
bi_data=False, bi_data=False,
same_length=False, same_length=False,
initializer_range=0.05, initializer_range=0.05,
seed=1, seed=1,
type_vocab_size=2, type_vocab_size=2,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -131,22 +141,44 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -131,22 +141,44 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
reuse_len=self.reuse_len, reuse_len=self.reuse_len,
bi_data=self.bi_data, bi_data=self.bi_data,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
num_labels=self.type_sequence_label_size) num_labels=self.type_sequence_label_size,
)
return (config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels) return (
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
)
def set_seed(self): def set_seed(self):
random.seed(self.seed) random.seed(self.seed)
tf.random.set_seed(self.seed) tf.random.set_seed(self.seed)
def create_and_check_xlnet_base_model(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask, def create_and_check_xlnet_base_model(
target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels): self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetModel(config) model = TFXLNetModel(config)
inputs = {'input_ids': input_ids_1, inputs = {"input_ids": input_ids_1, "input_mask": input_mask, "token_type_ids": segment_ids}
'input_mask': input_mask,
'token_type_ids': segment_ids}
_, _ = model(inputs) _, _ = model(inputs)
...@@ -165,30 +197,38 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -165,30 +197,38 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
self.parent.assertEqual(len(no_mems_outputs), 1) self.parent.assertEqual(len(no_mems_outputs), 1)
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["outputs"].shape), list(result["outputs"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]), list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_lm_head(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels): def create_and_check_xlnet_lm_head(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetLMHeadModel(config) model = TFXLNetLMHeadModel(config)
inputs_1 = {'input_ids': input_ids_1, inputs_1 = {"input_ids": input_ids_1, "token_type_ids": segment_ids}
'token_type_ids': segment_ids}
all_logits_1, mems_1 = model(inputs_1) all_logits_1, mems_1 = model(inputs_1)
inputs_2 = {'input_ids': input_ids_2, inputs_2 = {"input_ids": input_ids_2, "mems": mems_1, "token_type_ids": segment_ids}
'mems': mems_1,
'token_type_ids': segment_ids}
all_logits_2, mems_2 = model(inputs_2) all_logits_2, mems_2 = model(inputs_2)
inputs_3 = {'input_ids': input_ids_q, inputs_3 = {"input_ids": input_ids_q, "perm_mask": perm_mask, "target_mapping": target_mapping}
'perm_mask': perm_mask,
'target_mapping': target_mapping}
logits, _ = model(inputs_3) logits, _ = model(inputs_3)
...@@ -200,26 +240,38 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -200,26 +240,38 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["all_logits_1"].shape), list(result["all_logits_1"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]), list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["all_logits_2"].shape), list(result["all_logits_2"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_2"]), list(list(mem.shape) for mem in result["mems_2"]),
[[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.mem_len, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_qa(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels): def create_and_check_xlnet_qa(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForQuestionAnsweringSimple(config) model = TFXLNetForQuestionAnsweringSimple(config)
inputs = {'input_ids': input_ids_1, inputs = {"input_ids": input_ids_1, "attention_mask": input_mask, "token_type_ids": segment_ids}
'attention_mask': input_mask,
'token_type_ids': segment_ids}
start_logits, end_logits, mems = model(inputs) start_logits, end_logits, mems = model(inputs)
result = { result = {
...@@ -228,18 +280,27 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -228,18 +280,27 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
"mems": [m.numpy() for m in mems], "mems": [m.numpy() for m in mems],
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
list(result["start_logits"].shape), self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].shape),
[self.batch_size, self.seq_length])
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems"]), list(list(mem.shape) for mem in result["mems"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_sequence_classif(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels): def create_and_check_xlnet_sequence_classif(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
model = TFXLNetForSequenceClassification(config) model = TFXLNetForSequenceClassification(config)
logits, mems_1 = model(input_ids_1) logits, mems_1 = model(input_ids_1)
...@@ -249,42 +310,64 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -249,42 +310,64 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.type_sequence_label_size])
list(result["logits"].shape),
[self.batch_size, self.type_sequence_label_size])
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]), list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def create_and_check_xlnet_for_token_classification(self, config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask,
target_mapping, segment_ids, lm_labels, sequence_labels, is_impossible_labels): def create_and_check_xlnet_for_token_classification(
self,
config,
input_ids_1,
input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
):
config.num_labels = input_ids_1.shape[1] config.num_labels = input_ids_1.shape[1]
model = TFXLNetForTokenClassification(config) model = TFXLNetForTokenClassification(config)
inputs = {'input_ids': input_ids_1, inputs = {
'attention_mask': input_mask, "input_ids": input_ids_1,
# 'token_type_ids': token_type_ids "attention_mask": input_mask,
} # 'token_type_ids': token_type_ids
}
logits, mems_1 = model(inputs) logits, mems_1 = model(inputs)
result = { result = {
"mems_1": [mem.numpy() for mem in mems_1], "mems_1": [mem.numpy() for mem in mems_1],
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].shape), list(result["logits"].shape), [self.batch_size, self.seq_length, config.num_labels]
[self.batch_size, self.seq_length, config.num_labels]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(list(mem.shape) for mem in result["mems_1"]), list(list(mem.shape) for mem in result["mems_1"]),
[[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers) [[self.seq_length, self.batch_size, self.hidden_size]] * self.num_hidden_layers,
)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids_1, input_ids_2, input_ids_q, perm_mask, input_mask, (
target_mapping, segment_ids, lm_labels, config,
sequence_labels, is_impossible_labels) = config_and_inputs input_ids_1,
inputs_dict = {'input_ids': input_ids_1} input_ids_2,
input_ids_q,
perm_mask,
input_mask,
target_mapping,
segment_ids,
lm_labels,
sequence_labels,
is_impossible_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = TFXLNetModelTest.TFXLNetModelTester(self) self.model_tester = TFXLNetModelTest.TFXLNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37) self.config_tester = ConfigTester(self, config_class=XLNetConfig, d_inner=37)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment