Unverified Commit c852036b authored by Amil Khare's avatar Amil Khare Committed by GitHub
Browse files

[cleanup] Hoist ModelTester objects to top level (#4939)


Co-authored-by: default avatarSam Shleifer <sshleifer@gmail.com>
parent 0c55a384
...@@ -37,75 +37,34 @@ if is_torch_available(): ...@@ -37,75 +37,34 @@ if is_torch_available():
from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch class AlbertModelTester:
class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
AlbertModel,
AlbertForPreTraining,
AlbertForMaskedLM,
AlbertForMultipleChoice,
AlbertForSequenceClassification,
AlbertForTokenClassification,
AlbertForQuestionAnswering,
)
if is_torch_available()
else ()
)
class AlbertModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
embedding_size=16,
hidden_size=36,
num_hidden_layers=6,
num_hidden_groups=6,
num_attention_heads=6,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.embedding_size = embedding_size self.embedding_size = 16
self.hidden_size = hidden_size self.hidden_size = 36
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 6
self.num_attention_heads = num_attention_heads self.num_hidden_groups = 6
self.intermediate_size = intermediate_size self.num_attention_heads = 6
self.hidden_act = hidden_act self.intermediate_size = 37
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_act = "gelu"
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.hidden_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.attention_probs_dropout_prob = 0.1
self.type_vocab_size = type_vocab_size self.max_position_embeddings = 512
self.type_sequence_label_size = type_sequence_label_size self.type_vocab_size = 16
self.initializer_range = initializer_range self.type_sequence_label_size = 2
self.num_labels = num_labels self.initializer_range = 0.02
self.num_choices = num_choices self.num_labels = 3
self.scope = scope self.num_choices = 4
self.num_hidden_groups = num_hidden_groups self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -253,16 +212,12 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -253,16 +212,12 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
model = AlbertForTokenClassification(config=config) model = AlbertForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model( loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
)
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_albert_for_multiple_choice( def create_and_check_albert_for_multiple_choice(
...@@ -286,7 +241,6 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -286,7 +241,6 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices]) self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices])
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
...@@ -302,8 +256,26 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -302,8 +256,26 @@ class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_torch
class AlbertModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
AlbertModel,
AlbertForPreTraining,
AlbertForMaskedLM,
AlbertForMultipleChoice,
AlbertForSequenceClassification,
AlbertForTokenClassification,
AlbertForQuestionAnswering,
)
if is_torch_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = AlbertModelTest.AlbertModelTester(self) self.model_tester = AlbertModelTester(self)
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -27,66 +27,33 @@ if is_torch_available(): ...@@ -27,66 +27,33 @@ if is_torch_available():
from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, CTRLLMHeadModel from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_LIST, CTRLLMHeadModel
@require_torch class CTRLModelTester:
class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (CTRLModel, CTRLLMHeadModel) if is_torch_available() else ()
all_generative_model_classes = (CTRLLMHeadModel,) if is_torch_available() else ()
test_pruning = True
test_torchscript = False
test_resize_embeddings = False
test_head_masking = False
class CTRLModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=14,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_input_mask=True,
use_labels=True,
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 14
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_labels = use_labels self.use_labels = True
self.use_mc_token_ids = use_mc_token_ids self.use_mc_token_ids = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -193,8 +160,19 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -193,8 +160,19 @@ class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
return config, inputs_dict return config, inputs_dict
@require_torch
class CTRLModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (CTRLModel, CTRLLMHeadModel) if is_torch_available() else ()
all_generative_model_classes = (CTRLLMHeadModel,) if is_torch_available() else ()
test_pruning = True
test_torchscript = False
test_resize_embeddings = False
test_head_masking = False
def setUp(self): def setUp(self):
self.model_tester = CTRLModelTest.CTRLModelTester(self) self.model_tester = CTRLModelTester(self)
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37) self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
def test_config(self): def test_config(self):
......
...@@ -34,27 +34,6 @@ if is_torch_available(): ...@@ -34,27 +34,6 @@ if is_torch_available():
DistilBertForSequenceClassification, DistilBertForSequenceClassification,
) )
@require_torch
class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
DistilBertModel,
DistilBertForMaskedLM,
DistilBertForMultipleChoice,
DistilBertForQuestionAnswering,
DistilBertForSequenceClassification,
DistilBertForTokenClassification,
)
if is_torch_available()
else None
)
test_pruning = True
test_torchscript = True
test_resize_embeddings = True
test_head_masking = True
class DistilBertModelTester(object): class DistilBertModelTester(object):
def __init__( def __init__(
self, self,
...@@ -245,8 +224,29 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -245,8 +224,29 @@ class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_torch
class DistilBertModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
DistilBertModel,
DistilBertForMaskedLM,
DistilBertForMultipleChoice,
DistilBertForQuestionAnswering,
DistilBertForSequenceClassification,
DistilBertForTokenClassification,
)
if is_torch_available()
else None
)
test_pruning = True
test_torchscript = True
test_resize_embeddings = True
test_head_masking = True
def setUp(self): def setUp(self):
self.model_tester = DistilBertModelTest.DistilBertModelTester(self) self.model_tester = DistilBertModelTester(self)
self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37) self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)
def test_config(self): def test_config(self):
......
...@@ -36,70 +36,32 @@ if is_torch_available(): ...@@ -36,70 +36,32 @@ if is_torch_available():
from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_electra import ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch class ElectraModelTester:
class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
ElectraModel,
ElectraForPreTraining,
ElectraForMaskedLM,
ElectraForTokenClassification,
ElectraForSequenceClassification,
ElectraForQuestionAnswering,
)
if is_torch_available()
else ()
)
class ElectraModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -216,16 +178,12 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -216,16 +178,12 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
model = ElectraForTokenClassification(config=config) model = ElectraForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model( loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
)
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_electra_for_pretraining( def create_and_check_electra_for_pretraining(
...@@ -323,8 +281,25 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -323,8 +281,25 @@ class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_torch
class ElectraModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
ElectraModel,
ElectraForPreTraining,
ElectraForMaskedLM,
ElectraForTokenClassification,
ElectraForSequenceClassification,
ElectraForQuestionAnswering,
)
if is_torch_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = ElectraModelTest.ElectraModelTester(self) self.model_tester = ElectraModelTester(self)
self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -35,82 +35,38 @@ if is_torch_available(): ...@@ -35,82 +35,38 @@ if is_torch_available():
from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_flaubert import FLAUBERT_PRETRAINED_MODEL_ARCHIVE_LIST
@require_torch class FlaubertModelTester(object):
class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
FlaubertModel,
FlaubertWithLMHeadModel,
FlaubertForQuestionAnswering,
FlaubertForQuestionAnsweringSimple,
FlaubertForSequenceClassification,
)
if is_torch_available()
else ()
)
class FlaubertModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_lengths=True,
use_token_type_ids=True,
use_labels=True,
gelu_activation=True,
sinusoidal_embeddings=False,
causal=False,
asm=False,
n_langs=2,
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
summary_type="last",
use_proj=True,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_lengths = use_input_lengths self.use_input_lengths = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.gelu_activation = gelu_activation self.gelu_activation = True
self.sinusoidal_embeddings = sinusoidal_embeddings self.sinusoidal_embeddings = False
self.asm = asm self.causal = False
self.n_langs = n_langs self.asm = False
self.vocab_size = vocab_size self.n_langs = 2
self.n_special = n_special self.vocab_size = 99
self.summary_type = summary_type self.n_special = 0
self.causal = causal self.hidden_size = 32
self.use_proj = use_proj self.num_hidden_layers = 5
self.hidden_size = hidden_size self.num_attention_heads = 4
self.num_hidden_layers = num_hidden_layers self.hidden_dropout_prob = 0.1
self.num_attention_heads = num_attention_heads self.attention_probs_dropout_prob = 0.1
self.hidden_dropout_prob = hidden_dropout_prob self.max_position_embeddings = 512
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.type_vocab_size = 12
self.max_position_embeddings = max_position_embeddings self.type_sequence_label_size = 2
self.n_langs = n_langs self.initializer_range = 0.02
self.type_sequence_label_size = type_sequence_label_size self.num_labels = 3
self.initializer_range = initializer_range self.num_choices = 4
self.summary_type = summary_type self.summary_type = "last"
self.num_labels = num_labels self.use_proj = None
self.num_choices = num_choices self.scope = None
self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -215,9 +171,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -215,9 +171,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
} }
self.parent.assertListEqual(list(result["loss"].size()), []) self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size])
list(result["logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
)
def create_and_check_flaubert_simple_qa( def create_and_check_flaubert_simple_qa(
self, self,
...@@ -310,8 +264,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -310,8 +264,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
[self.batch_size, model.config.start_n_top * model.config.end_n_top], [self.batch_size, model.config.start_n_top * model.config.end_n_top],
) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["end_top_index"].size()), list(result["end_top_index"].size()), [self.batch_size, model.config.start_n_top * model.config.end_n_top],
[self.batch_size, model.config.start_n_top * model.config.end_n_top],
) )
self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size]) self.parent.assertListEqual(list(result["cls_logits"].size()), [self.batch_size])
...@@ -339,9 +292,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -339,9 +292,7 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
} }
self.parent.assertListEqual(list(result["loss"].size()), []) self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size])
list(result["logits"].size()), [self.batch_size, self.type_sequence_label_size]
)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
...@@ -358,8 +309,24 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -358,8 +309,24 @@ class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "lengths": input_lengths}
return config, inputs_dict return config, inputs_dict
@require_torch
class FlaubertModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
FlaubertModel,
FlaubertWithLMHeadModel,
FlaubertForQuestionAnswering,
FlaubertForQuestionAnsweringSimple,
FlaubertForSequenceClassification,
)
if is_torch_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = FlaubertModelTest.FlaubertModelTester(self) self.model_tester = FlaubertModelTester(self)
self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37) self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37)
def test_config(self): def test_config(self):
......
...@@ -34,15 +34,7 @@ if is_torch_available(): ...@@ -34,15 +34,7 @@ if is_torch_available():
) )
@require_torch class GPT2ModelTester:
class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
all_generative_model_classes = (
(GPT2LMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
class GPT2ModelTester(object):
def __init__( def __init__(
self, self,
parent, parent,
...@@ -70,28 +62,28 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -70,28 +62,28 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 14
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_labels = use_labels self.use_labels = True
self.use_mc_token_ids = use_mc_token_ids self.use_mc_token_ids = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0, 1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
self.bos_token_id = vocab_size - 1 self.bos_token_id = vocab_size - 1
self.eos_token_id = vocab_size - 1 self.eos_token_id = vocab_size - 1
...@@ -277,8 +269,7 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -277,8 +269,7 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
self.parent.assertListEqual(list(result["loss"].size()), []) self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits"].size()), list(result["lm_logits"].size()), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size],
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size],
) )
self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices]) self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices])
...@@ -305,8 +296,17 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -305,8 +296,17 @@ class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
return config, inputs_dict return config, inputs_dict
@require_torch
class GPT2ModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
all_generative_model_classes = (
(GPT2LMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
def setUp(self): def setUp(self):
self.model_tester = GPT2ModelTest.GPT2ModelTester(self) self.model_tester = GPT2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37) self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
def test_config(self): def test_config(self):
......
...@@ -36,56 +36,33 @@ if is_torch_available(): ...@@ -36,56 +36,33 @@ if is_torch_available():
) )
class LongformerModelTester(object): class LongformerModelTester:
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
attention_window=4,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
self.attention_window = attention_window self.attention_window = 4
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size # `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention # [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
......
...@@ -34,62 +34,31 @@ if is_torch_available(): ...@@ -34,62 +34,31 @@ if is_torch_available():
) )
@require_torch class OpenAIGPTModelTester:
class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
)
all_generative_model_classes = (
(OpenAIGPTLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
class OpenAIGPTModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -197,8 +166,19 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -197,8 +166,19 @@ class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
return config, inputs_dict return config, inputs_dict
@require_torch
class OpenAIGPTModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
)
all_generative_model_classes = (
(OpenAIGPTLMHeadModel,) if is_torch_available() else ()
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
def setUp(self): def setUp(self):
self.model_tester = OpenAIGPTModelTest.OpenAIGPTModelTester(self) self.model_tester = OpenAIGPTModelTester(self)
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37) self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
def test_config(self): def test_config(self):
......
...@@ -39,70 +39,32 @@ if is_torch_available(): ...@@ -39,70 +39,32 @@ if is_torch_available():
from transformers.modeling_utils import create_position_ids_from_input_ids from transformers.modeling_utils import create_position_ids_from_input_ids
@require_torch class RobertaModelTester:
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
RobertaForMaskedLM,
RobertaModel,
RobertaForSequenceClassification,
RobertaForTokenClassification,
RobertaForMultipleChoice,
RobertaForQuestionAnswering,
)
if is_torch_available()
else ()
)
class RobertaModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -186,16 +148,12 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -186,16 +148,12 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
model = RobertaForTokenClassification(config=config) model = RobertaForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model( loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
)
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_roberta_for_multiple_choice( def create_and_check_roberta_for_multiple_choice(
...@@ -257,8 +215,25 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -257,8 +215,25 @@ class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_torch
class RobertaModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
RobertaForMaskedLM,
RobertaModel,
RobertaForSequenceClassification,
RobertaForTokenClassification,
RobertaForMultipleChoice,
RobertaForQuestionAnswering,
)
if is_torch_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = RobertaModelTest.RobertaModelTester(self) self.model_tester = RobertaModelTester(self)
self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -30,60 +30,28 @@ if is_torch_available(): ...@@ -30,60 +30,28 @@ if is_torch_available():
from transformers.tokenization_t5 import T5Tokenizer from transformers.tokenization_t5 import T5Tokenizer
@require_torch class T5ModelTester:
class T5ModelTest(ModelTesterMixin, unittest.TestCase): def __init__(self, parent):
all_model_classes = (T5Model, T5ForConditionalGeneration) if is_torch_available() else ()
all_generative_model_classes = (T5ForConditionalGeneration,) if is_torch_available() else ()
test_pruning = False
test_torchscript = False
test_resize_embeddings = False
is_encoder_decoder = True
class T5ModelTester(object):
def __init__(
self,
parent,
batch_size=13,
encoder_seq_length=7,
decoder_seq_length=9,
is_training=True,
use_attention_mask=True,
use_labels=True,
vocab_size=99,
n_positions=14,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
dropout_rate=0.1,
initializer_factor=0.002,
eos_token_id=1,
pad_token_id=0,
decoder_start_token_id=0,
scope=None,
):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.encoder_seq_length = encoder_seq_length self.encoder_seq_length = 7
self.decoder_seq_length = decoder_seq_length self.decoder_seq_length = 9
self.is_training = is_training self.is_training = True
self.use_attention_mask = use_attention_mask self.use_attention_mask = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.n_positions = n_positions self.n_positions = 14
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.d_ff = d_ff self.d_ff = 37
self.relative_attention_num_buckets = relative_attention_num_buckets self.relative_attention_num_buckets = 8
self.dropout_rate = dropout_rate self.dropout_rate = 0.1
self.initializer_factor = initializer_factor self.initializer_factor = 0.002
self.scope = scope self.eos_token_id = 1
self.eos_token_id = eos_token_id self.pad_token_id = 0
self.pad_token_id = pad_token_id self.decoder_start_token_id = 0
self.decoder_start_token_id = decoder_start_token_id self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
...@@ -173,9 +141,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -173,9 +141,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
attention_mask=attention_mask, attention_mask=attention_mask,
decoder_attention_mask=decoder_attention_mask, decoder_attention_mask=decoder_attention_mask,
) )
decoder_output, decoder_past, encoder_output = model( decoder_output, decoder_past, encoder_output = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
input_ids=input_ids, decoder_input_ids=decoder_input_ids
)
result = { result = {
"encoder_output": encoder_output, "encoder_output": encoder_output,
...@@ -278,9 +244,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -278,9 +244,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
# get two different outputs # get two different outputs
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
output_from_past = model( output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0]
next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask
)[0]
# select random slice # select random slice
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item() random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
...@@ -316,14 +280,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -316,14 +280,7 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
( (config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,) = config_and_inputs
config,
input_ids,
decoder_input_ids,
attention_mask,
decoder_attention_mask,
lm_labels,
) = config_and_inputs
inputs_dict = { inputs_dict = {
"input_ids": input_ids, "input_ids": input_ids,
...@@ -334,8 +291,19 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -334,8 +291,19 @@ class T5ModelTest(ModelTesterMixin, unittest.TestCase):
} }
return config, inputs_dict return config, inputs_dict
@require_torch
class T5ModelTest(ModelTesterMixin, unittest.TestCase):
all_model_classes = (T5Model, T5ForConditionalGeneration) if is_torch_available() else ()
all_generative_model_classes = (T5ForConditionalGeneration,) if is_torch_available() else ()
test_pruning = False
test_torchscript = False
test_resize_embeddings = False
is_encoder_decoder = True
def setUp(self): def setUp(self):
self.model_tester = T5ModelTest.T5ModelTester(self) self.model_tester = T5ModelTester(self)
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37) self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
def test_config(self): def test_config(self):
......
...@@ -34,22 +34,7 @@ if is_tf_available(): ...@@ -34,22 +34,7 @@ if is_tf_available():
) )
@require_tf class TFAlbertModelTester:
class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFAlbertModel,
TFAlbertForPreTraining,
TFAlbertForMaskedLM,
TFAlbertForSequenceClassification,
TFAlbertForQuestionAnswering,
)
if is_tf_available()
else ()
)
class TFAlbertModelTester(object):
def __init__( def __init__(
self, self,
parent, parent,
...@@ -77,28 +62,28 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -77,28 +62,28 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.embedding_size = embedding_size self.embedding_size = 16
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -228,8 +213,24 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -228,8 +213,24 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFAlbertModel,
TFAlbertForPreTraining,
TFAlbertForMaskedLM,
TFAlbertForSequenceClassification,
TFAlbertForQuestionAnswering,
)
if is_tf_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self) self.model_tester = TFAlbertModelTester(self)
self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -37,25 +37,7 @@ if is_tf_available(): ...@@ -37,25 +37,7 @@ if is_tf_available():
) )
@require_tf class TFBertModelTester:
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFBertModel,
TFBertForMaskedLM,
TFBertForNextSentencePrediction,
TFBertForPreTraining,
TFBertForQuestionAnswering,
TFBertForSequenceClassification,
TFBertForTokenClassification,
TFBertForMultipleChoice,
)
if is_tf_available()
else ()
)
class TFBertModelTester(object):
def __init__( def __init__(
self, self,
parent, parent,
...@@ -82,27 +64,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -82,27 +64,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -240,9 +222,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -240,9 +222,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
)
def create_and_check_bert_for_question_answering( def create_and_check_bert_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
...@@ -271,8 +251,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -271,8 +251,27 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFBertModel,
TFBertForMaskedLM,
TFBertForNextSentencePrediction,
TFBertForPreTraining,
TFBertForQuestionAnswering,
TFBertForSequenceClassification,
TFBertForTokenClassification,
TFBertForMultipleChoice,
)
if is_tf_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = TFBertModelTest.TFBertModelTester(self) self.model_tester = TFBertModelTester(self)
self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -28,62 +28,33 @@ if is_tf_available(): ...@@ -28,62 +28,33 @@ if is_tf_available():
from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST
@require_tf class TFCTRLModelTester(object):
class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else ()
all_generative_model_classes = (TFCTRLLMHeadModel,) if is_tf_available() else ()
class TFCTRLModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_input_mask=True,
use_labels=True,
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_labels = use_labels self.use_labels = True
self.use_mc_token_ids = use_mc_token_ids self.use_mc_token_ids = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -183,8 +154,15 @@ class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -183,8 +154,15 @@ class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFCTRLModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else ()
all_generative_model_classes = (TFCTRLLMHeadModel,) if is_tf_available() else ()
def setUp(self): def setUp(self):
self.model_tester = TFCTRLModelTest.TFCTRLModelTester(self) self.model_tester = TFCTRLModelTester(self)
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37) self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
def test_config(self): def test_config(self):
......
...@@ -32,72 +32,32 @@ if is_tf_available(): ...@@ -32,72 +32,32 @@ if is_tf_available():
) )
@require_tf class TFDistilBertModelTester:
class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFDistilBertModel,
TFDistilBertForMaskedLM,
TFDistilBertForQuestionAnswering,
TFDistilBertForSequenceClassification,
)
if is_tf_available()
else None
)
test_pruning = True
test_torchscript = True
test_resize_embeddings = True
test_head_masking = True
class TFDistilBertModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=False,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = False
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -193,8 +153,27 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -193,8 +153,27 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFDistilBertModel,
TFDistilBertForMaskedLM,
TFDistilBertForQuestionAnswering,
TFDistilBertForSequenceClassification,
)
if is_tf_available()
else None
)
test_pruning = True
test_torchscript = True
test_resize_embeddings = True
test_head_masking = True
def setUp(self): def setUp(self):
self.model_tester = TFDistilBertModelTest.TFDistilBertModelTester(self) self.model_tester = TFDistilBertModelTester(self)
self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37) self.config_tester = ConfigTester(self, config_class=DistilBertConfig, dim=37)
def test_config(self): def test_config(self):
......
...@@ -32,63 +32,32 @@ if is_tf_available(): ...@@ -32,63 +32,32 @@ if is_tf_available():
) )
@require_tf class TFElectraModelTester:
class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(TFElectraModel, TFElectraForMaskedLM, TFElectraForPreTraining, TFElectraForTokenClassification,)
if is_tf_available()
else ()
)
class TFElectraModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -178,9 +147,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -178,9 +147,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
...@@ -196,8 +163,18 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -196,8 +163,18 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(TFElectraModel, TFElectraForMaskedLM, TFElectraForPreTraining, TFElectraForTokenClassification,)
if is_tf_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = TFElectraModelTest.TFElectraModelTester(self) self.model_tester = TFElectraModelTester(self)
self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=ElectraConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -34,64 +34,35 @@ if is_tf_available(): ...@@ -34,64 +34,35 @@ if is_tf_available():
) )
@require_tf class TFGPT2ModelTester:
class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else ()
all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else ()
class TFGPT2ModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_input_mask=True,
use_labels=True,
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_labels = use_labels self.use_labels = True
self.use_mc_token_ids = use_mc_token_ids self.use_mc_token_ids = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
self.bos_token_id = vocab_size - 1 self.bos_token_id = self.vocab_size - 1
self.eos_token_id = vocab_size - 1 self.eos_token_id = self.vocab_size - 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -294,8 +265,15 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -294,8 +265,15 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
} }
return config, inputs_dict return config, inputs_dict
@require_tf
class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else ()
all_generative_model_classes = (TFGPT2LMHeadModel,) if is_tf_available() else ()
def setUp(self): def setUp(self):
self.model_tester = TFGPT2ModelTest.TFGPT2ModelTester(self) self.model_tester = TFGPT2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37) self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
def test_config(self): def test_config(self):
......
...@@ -33,66 +33,33 @@ if is_tf_available(): ...@@ -33,66 +33,33 @@ if is_tf_available():
) )
@require_tf class TFOpenAIGPTModelTester:
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
)
all_generative_model_classes = (
(TFOpenAIGPTLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
class TFOpenAIGPTModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_input_mask=True,
use_labels=True,
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_labels = use_labels self.use_labels = True
self.use_mc_token_ids = use_mc_token_ids self.use_mc_token_ids = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -214,8 +181,19 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -214,8 +181,19 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
)
all_generative_model_classes = (
(TFOpenAIGPTLMHeadModel,) if is_tf_available() else ()
) # TODO (PVP): Add Double HeadsModel when generate() function is changed accordingly
def setUp(self): def setUp(self):
self.model_tester = TFOpenAIGPTModelTest.TFOpenAIGPTModelTester(self) self.model_tester = TFOpenAIGPTModelTester(self)
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37) self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
def test_config(self): def test_config(self):
......
...@@ -36,69 +36,32 @@ if is_tf_available(): ...@@ -36,69 +36,32 @@ if is_tf_available():
) )
@require_tf class TFRobertaModelTester:
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFRobertaModel,
TFRobertaForMaskedLM,
TFRobertaForSequenceClassification,
TFRobertaForTokenClassification,
TFRobertaForQuestionAnswering,
)
if is_tf_available()
else ()
)
class TFRobertaModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.intermediate_size = intermediate_size self.intermediate_size = 37
self.hidden_act = hidden_act self.hidden_act = "gelu"
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = 0.1
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = 0.1
self.max_position_embeddings = max_position_embeddings self.max_position_embeddings = 512
self.type_vocab_size = type_vocab_size self.type_vocab_size = 16
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = 2
self.initializer_range = initializer_range self.initializer_range = 0.02
self.num_labels = num_labels self.num_labels = 3
self.num_choices = num_choices self.num_choices = 4
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -176,9 +139,7 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -176,9 +139,7 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
)
def create_and_check_roberta_for_question_answering( def create_and_check_roberta_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
...@@ -207,8 +168,24 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -207,8 +168,24 @@ class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFRobertaModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (
(
TFRobertaModel,
TFRobertaForMaskedLM,
TFRobertaForSequenceClassification,
TFRobertaForTokenClassification,
TFRobertaForQuestionAnswering,
)
if is_tf_available()
else ()
)
def setUp(self): def setUp(self):
self.model_tester = TFRobertaModelTest.TFRobertaModelTester(self) self.model_tester = TFRobertaModelTester(self)
self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
def test_config(self): def test_config(self):
......
...@@ -28,53 +28,28 @@ if is_tf_available(): ...@@ -28,53 +28,28 @@ if is_tf_available():
from transformers import TFT5Model, TFT5ForConditionalGeneration, T5Tokenizer from transformers import TFT5Model, TFT5ForConditionalGeneration, T5Tokenizer
@require_tf class TFT5ModelTester:
class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
is_encoder_decoder = True
all_model_classes = (TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else ()
all_generative_model_classes = (TFT5ForConditionalGeneration,) if is_tf_available() else ()
class TFT5ModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_labels=True,
vocab_size=99,
n_positions=14,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
dropout_rate=0.1,
initializer_factor=0.002,
eos_token_id=1,
pad_token_id=0,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.is_training = is_training self.is_training = True
self.use_input_mask = use_input_mask self.use_input_mask = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.n_positions = n_positions self.n_positions = 14
self.hidden_size = hidden_size self.hidden_size = 32
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.d_ff = d_ff self.d_ff = 37
self.relative_attention_num_buckets = relative_attention_num_buckets self.relative_attention_num_buckets = 8
self.dropout_rate = dropout_rate self.dropout_rate = 0.1
self.initializer_factor = initializer_factor self.initializer_factor = 0.002
self.eos_token_id = eos_token_id self.eos_token_id = 1
self.pad_token_id = pad_token_id self.pad_token_id = 0
self.scope = scope self.scope = None
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -211,9 +186,7 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -211,9 +186,7 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
# get two different outputs # get two different outputs
output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0] output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
output_from_past = model( output_from_past = model(next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask)[0]
next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask
)[0]
# select random slice # select random slice
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item() random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).numpy().item()
...@@ -234,8 +207,16 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -234,8 +207,16 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
} }
return config, inputs_dict return config, inputs_dict
@require_tf
class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
is_encoder_decoder = True
all_model_classes = (TFT5Model, TFT5ForConditionalGeneration) if is_tf_available() else ()
all_generative_model_classes = (TFT5ForConditionalGeneration,) if is_tf_available() else ()
def setUp(self): def setUp(self):
self.model_tester = TFT5ModelTest.TFT5ModelTester(self) self.model_tester = TFT5ModelTester(self)
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37) self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
def test_config(self): def test_config(self):
......
...@@ -33,59 +33,30 @@ if is_tf_available(): ...@@ -33,59 +33,30 @@ if is_tf_available():
) )
@require_tf class TFTransfoXLModelTester:
class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFTransfoXLModel, TFTransfoXLLMHeadModel) if is_tf_available() else ()
all_generative_model_classes = () if is_tf_available() else ()
# TODO: add this test when TFTransfoXLLMHead has a linear output layer implemented
test_pruning = False
test_torchscript = False
test_resize_embeddings = False
class TFTransfoXLModelTester(object):
def __init__( def __init__(
self, self, parent,
parent,
batch_size=13,
seq_length=7,
mem_len=30,
clamp_len=15,
is_training=True,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
d_embed=32,
num_attention_heads=4,
d_head=8,
d_inner=128,
div_val=2,
num_hidden_layers=5,
scope=None,
seed=1,
eos_token_id=0,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = 13
self.seq_length = seq_length self.seq_length = 7
self.mem_len = mem_len self.mem_len = 30
self.key_length = seq_length + mem_len self.key_length = self.seq_length + self.mem_len
self.clamp_len = clamp_len self.clamp_len = 15
self.is_training = is_training self.is_training = True
self.use_labels = use_labels self.use_labels = True
self.vocab_size = vocab_size self.vocab_size = 99
self.cutoffs = cutoffs self.cutoffs = [10, 50, 80]
self.hidden_size = hidden_size self.hidden_size = 32
self.d_embed = d_embed self.d_embed = 32
self.num_attention_heads = num_attention_heads self.num_attention_heads = 4
self.d_head = d_head self.d_head = 8
self.d_inner = d_inner self.d_inner = 128
self.div_val = div_val self.div_val = 2
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = 5
self.scope = scope self.scope = None
self.seed = seed self.seed = 1
self.eos_token_id = eos_token_id self.eos_token_id = 0
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids_1 = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
...@@ -190,8 +161,19 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -190,8 +161,19 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
inputs_dict = {"input_ids": input_ids_1} inputs_dict = {"input_ids": input_ids_1}
return config, inputs_dict return config, inputs_dict
@require_tf
class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TFTransfoXLModel, TFTransfoXLLMHeadModel) if is_tf_available() else ()
all_generative_model_classes = () if is_tf_available() else ()
# TODO: add this test when TFTransfoXLLMHead has a linear output layer implemented
test_pruning = False
test_torchscript = False
test_resize_embeddings = False
def setUp(self): def setUp(self):
self.model_tester = TFTransfoXLModelTest.TFTransfoXLModelTester(self) self.model_tester = TFTransfoXLModelTester(self)
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37) self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
def test_config(self): def test_config(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment