Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
......@@ -46,7 +46,7 @@ class FlaxRobertaModelTester(unittest.TestCase):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -58,7 +58,7 @@ class RobertaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -49,7 +49,7 @@ class FlaxRobertaPreLayerNormModelTester(unittest.TestCase):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -57,7 +57,7 @@ class RobertaPreLayerNormModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -58,7 +58,7 @@ class RoCBertModelTester:
pronunciation_embed_dim=32,
shape_embed_dim=32,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -47,7 +47,7 @@ class FlaxRoFormerModelTester(unittest.TestCase):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -56,7 +56,7 @@ class RoFormerModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -52,7 +52,7 @@ class RwkvModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
......
......@@ -65,7 +65,7 @@ class SEWModelTester:
num_conv_pos_embeddings=31,
num_conv_pos_embedding_groups=2,
squeeze_factor=2,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
hidden_dropout=0.1,
intermediate_size=20,
......
......@@ -72,7 +72,7 @@ class SEWDModelTester:
position_biased_input=False,
pos_att_type=("p2c", "c2p"),
norm_rel_ebd="layer_norm",
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
hidden_dropout=0.1,
intermediate_size=20,
......
......@@ -50,7 +50,7 @@ class Speech2Text2StandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
decoder_attention_heads=4,
max_position_embeddings=30,
pad_token_id=0,
......
......@@ -105,7 +105,7 @@ class SpeechT5ModelTester:
is_training=False,
vocab_size=81,
hidden_size=24,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=4,
):
......@@ -249,7 +249,7 @@ class SpeechT5ForSpeechToTextTester:
decoder_seq_length=7,
is_training=False,
hidden_size=24,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=4,
conv_dim=(32, 32, 32),
......@@ -786,7 +786,7 @@ class SpeechT5ForTextToSpeechTester:
decoder_seq_length=1024, # speech is longer
is_training=False,
hidden_size=24,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=4,
vocab_size=81,
......@@ -1031,7 +1031,7 @@ class SpeechT5ForSpeechToSpeechTester:
decoder_seq_length=1024,
is_training=False,
hidden_size=24,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=4,
conv_dim=(32, 32, 32),
......
......@@ -46,7 +46,7 @@ class SplinterModelTester:
vocab_size=99,
hidden_size=32,
question_token_id=1,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -50,7 +50,7 @@ class SqueezeBertModelTester(object):
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=64,
hidden_act="gelu",
......
......@@ -58,7 +58,7 @@ class SwitchTransformersModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......@@ -826,7 +826,7 @@ class SwitchTransformersEncoderOnlyModelTester:
# For common tests
use_attention_mask=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......
......@@ -70,7 +70,7 @@ class FlaxT5ModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......@@ -477,7 +477,7 @@ class FlaxT5EncoderOnlyModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......
......@@ -71,7 +71,7 @@ class T5ModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......@@ -902,7 +902,7 @@ class T5EncoderOnlyModelTester:
# For common tests
use_attention_mask=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......
......@@ -79,7 +79,7 @@ class TapasModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -60,7 +60,7 @@ class TimesformerModelTester:
is_training=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -52,7 +52,7 @@ class TransfoXLModelTester:
d_head=8,
d_inner=128,
div_val=2,
num_hidden_layers=5,
num_hidden_layers=2,
scope=None,
seed=1,
eos_token_id=0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment