"examples/flax/vscode:/vscode.git/clone" did not exist on "fc63914399b6f60512c720959f9182b02ae4a45c"
Unverified Commit f69eb24b authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Improve model tester (#19984)



* part 1

* part 2

* part 3

* fix

* For CANINE

* For ESMFold
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 74877437
...@@ -44,31 +44,54 @@ class AlbertModelTester: ...@@ -44,31 +44,54 @@ class AlbertModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
embedding_size=16,
hidden_size=36,
num_hidden_layers=6,
num_hidden_groups=6,
num_attention_heads=6,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.embedding_size = 16 self.embedding_size = embedding_size
self.hidden_size = 36 self.hidden_size = hidden_size
self.num_hidden_layers = 6 self.num_hidden_layers = num_hidden_layers
self.num_hidden_groups = 6 self.num_hidden_groups = num_hidden_groups
self.num_attention_heads = 6 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -48,6 +48,8 @@ class CanineModelTester: ...@@ -48,6 +48,8 @@ class CanineModelTester:
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
# let's use a vocab size that's way bigger than BERT's one
vocab_size=100000,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
...@@ -70,6 +72,7 @@ class CanineModelTester: ...@@ -70,6 +72,7 @@ class CanineModelTester:
self.use_input_mask = use_input_mask self.use_input_mask = use_input_mask
self.use_token_type_ids = use_token_type_ids self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
...@@ -86,8 +89,7 @@ class CanineModelTester: ...@@ -86,8 +89,7 @@ class CanineModelTester:
self.scope = scope self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
# let's use a vocab size that's way bigger than BERT's one input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids = ids_tensor([self.batch_size, self.seq_length], 100000)
input_mask = None input_mask = None
if self.use_input_mask: if self.use_input_mask:
......
...@@ -39,30 +39,52 @@ class CTRLModelTester: ...@@ -39,30 +39,52 @@ class CTRLModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=14,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_input_mask=True,
use_labels=True,
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 14 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_labels = True self.use_labels = use_labels
self.use_mc_token_ids = True self.use_mc_token_ids = use_mc_token_ids
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
self.pad_token_id = self.vocab_size - 1 self.pad_token_id = self.vocab_size - 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -48,29 +48,50 @@ class Data2VecTextModelTester: ...@@ -48,29 +48,50 @@ class Data2VecTextModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -45,29 +45,50 @@ class ElectraModelTester: ...@@ -45,29 +45,50 @@ class ElectraModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -40,29 +40,50 @@ class EsmModelTester: ...@@ -40,29 +40,50 @@ class EsmModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=False,
use_input_mask=True,
use_token_type_ids=False,
use_labels=True,
vocab_size=33,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = False self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = False self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 33 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -34,29 +34,50 @@ class EsmFoldModelTester: ...@@ -34,29 +34,50 @@ class EsmFoldModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=False,
use_input_mask=True,
use_token_type_ids=False,
use_labels=False,
vocab_size=19,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = False self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = False self.use_token_type_ids = use_token_type_ids
self.use_labels = False self.use_labels = use_labels
self.vocab_size = 19 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -42,35 +42,62 @@ class FlaubertModelTester(object): ...@@ -42,35 +42,62 @@ class FlaubertModelTester(object):
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_lengths=True,
use_token_type_ids=True,
use_labels=True,
gelu_activation=True,
sinusoidal_embeddings=False,
causal=False,
asm=False,
n_langs=2,
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=12,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
summary_type="last",
use_proj=None,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_lengths = True self.use_input_lengths = use_input_lengths
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.gelu_activation = True self.gelu_activation = gelu_activation
self.sinusoidal_embeddings = False self.sinusoidal_embeddings = sinusoidal_embeddings
self.causal = False self.causal = causal
self.asm = False self.asm = asm
self.n_langs = 2 self.n_langs = n_langs
self.vocab_size = 99 self.vocab_size = vocab_size
self.n_special = 0 self.n_special = n_special
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 12 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.summary_type = "last" self.summary_type = summary_type
self.use_proj = None self.use_proj = use_proj
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -46,26 +46,44 @@ class FSMTModelTester: ...@@ -46,26 +46,44 @@ class FSMTModelTester:
def __init__( def __init__(
self, self,
parent, parent,
src_vocab_size=99,
tgt_vocab_size=99,
langs=["ru", "en"],
batch_size=13,
seq_length=7,
is_training=False,
use_labels=False,
hidden_size=16,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="relu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=20,
bos_token_id=0,
pad_token_id=1,
eos_token_id=2,
): ):
self.parent = parent self.parent = parent
self.src_vocab_size = 99 self.src_vocab_size = src_vocab_size
self.tgt_vocab_size = 99 self.tgt_vocab_size = tgt_vocab_size
self.langs = ["ru", "en"] self.langs = langs
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = False self.is_training = is_training
self.use_labels = False self.use_labels = use_labels
self.hidden_size = 16 self.hidden_size = hidden_size
self.num_hidden_layers = 2 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 4 self.intermediate_size = intermediate_size
self.hidden_act = "relu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 20 self.max_position_embeddings = max_position_embeddings
self.bos_token_id = 0 self.bos_token_id = bos_token_id
self.pad_token_id = 1 self.pad_token_id = pad_token_id
self.eos_token_id = 2 self.eos_token_id = eos_token_id
torch.manual_seed(0) torch.manual_seed(0)
# hack needed for modeling_common tests - despite not really having this attribute in this model # hack needed for modeling_common tests - despite not really having this attribute in this model
......
...@@ -53,29 +53,50 @@ class IBertModelTester: ...@@ -53,29 +53,50 @@ class IBertModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -41,30 +41,52 @@ class LongformerModelTester: ...@@ -41,30 +41,52 @@ class LongformerModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
attention_window=4,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
self.attention_window = 4 self.attention_window = attention_window
# `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size # `ModelTesterMixin.test_attention_outputs` is expecting attention tensors to be of size
# [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention # [num_attention_heads, encoder_seq_length, encoder_key_length], but LongformerSelfAttention
......
...@@ -41,28 +41,48 @@ class OpenAIGPTModelTester: ...@@ -41,28 +41,48 @@ class OpenAIGPTModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
self.pad_token_id = self.vocab_size - 1 self.pad_token_id = self.vocab_size - 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -50,29 +50,50 @@ class RobertaModelTester: ...@@ -50,29 +50,50 @@ class RobertaModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -37,28 +37,47 @@ class TransfoXLModelTester: ...@@ -37,28 +37,47 @@ class TransfoXLModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=14,
seq_length=7,
mem_len=30,
clamp_len=15,
is_training=False,
use_labels=True,
vocab_size=99,
cutoffs=[10, 50, 80],
hidden_size=32,
d_embed=32,
num_attention_heads=4,
d_head=8,
d_inner=128,
div_val=2,
num_hidden_layers=5,
scope=None,
seed=1,
eos_token_id=0,
num_labels=3,
): ):
self.parent = parent self.parent = parent
self.batch_size = 14 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.mem_len = 30 self.mem_len = mem_len
self.key_length = self.seq_length + self.mem_len self.key_length = self.seq_length + self.mem_len
self.clamp_len = 15 self.clamp_len = clamp_len
self.is_training = False self.is_training = is_training
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.cutoffs = [10, 50, 80] self.cutoffs = cutoffs
self.hidden_size = 32 self.hidden_size = hidden_size
self.d_embed = 32 self.d_embed = d_embed
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.d_head = 8 self.d_head = d_head
self.d_inner = 128 self.d_inner = d_inner
self.div_val = 2 self.div_val = div_val
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.scope = None self.scope = scope
self.seed = 1 self.seed = seed
self.eos_token_id = 0 self.eos_token_id = eos_token_id
self.num_labels = 3 self.num_labels = num_labels
self.pad_token_id = self.vocab_size - 1 self.pad_token_id = self.vocab_size - 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
......
...@@ -42,35 +42,62 @@ class XLMModelTester: ...@@ -42,35 +42,62 @@ class XLMModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_lengths=True,
use_token_type_ids=True,
use_labels=True,
gelu_activation=True,
sinusoidal_embeddings=False,
causal=False,
asm=False,
n_langs=2,
vocab_size=99,
n_special=0,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=2,
num_choices=4,
summary_type="last",
use_proj=True,
scope=None,
bos_token_id=0,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_lengths = True self.use_input_lengths = use_input_lengths
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.gelu_activation = True self.gelu_activation = gelu_activation
self.sinusoidal_embeddings = False self.sinusoidal_embeddings = sinusoidal_embeddings
self.causal = False self.causal = causal
self.asm = False self.asm = asm
self.n_langs = 2 self.n_langs = n_langs
self.vocab_size = 99 self.vocab_size = vocab_size
self.n_special = 0 self.n_special = n_special
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 2 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.summary_type = "last" self.summary_type = summary_type
self.use_proj = True self.use_proj = use_proj
self.scope = None self.scope = scope
self.bos_token_id = 0 self.bos_token_id = bos_token_id
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
...@@ -46,29 +46,50 @@ class XLMRobertaXLModelTester: ...@@ -46,29 +46,50 @@ class XLMRobertaXLModelTester:
def __init__( def __init__(
self, self,
parent, parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = 13 self.batch_size = batch_size
self.seq_length = 7 self.seq_length = seq_length
self.is_training = True self.is_training = is_training
self.use_input_mask = True self.use_input_mask = use_input_mask
self.use_token_type_ids = True self.use_token_type_ids = use_token_type_ids
self.use_labels = True self.use_labels = use_labels
self.vocab_size = 99 self.vocab_size = vocab_size
self.hidden_size = 32 self.hidden_size = hidden_size
self.num_hidden_layers = 5 self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = 4 self.num_attention_heads = num_attention_heads
self.intermediate_size = 37 self.intermediate_size = intermediate_size
self.hidden_act = "gelu" self.hidden_act = hidden_act
self.hidden_dropout_prob = 0.1 self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = 0.1 self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = 512 self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = 16 self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = 2 self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = 0.02 self.initializer_range = initializer_range
self.num_labels = 3 self.num_labels = num_labels
self.num_choices = 4 self.num_choices = num_choices
self.scope = None self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment