Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
...@@ -46,7 +46,7 @@ class FlaxRobertaModelTester(unittest.TestCase): ...@@ -46,7 +46,7 @@ class FlaxRobertaModelTester(unittest.TestCase):
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -58,7 +58,7 @@ class RobertaModelTester: ...@@ -58,7 +58,7 @@ class RobertaModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -49,7 +49,7 @@ class FlaxRobertaPreLayerNormModelTester(unittest.TestCase): ...@@ -49,7 +49,7 @@ class FlaxRobertaPreLayerNormModelTester(unittest.TestCase):
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -57,7 +57,7 @@ class RobertaPreLayerNormModelTester: ...@@ -57,7 +57,7 @@ class RobertaPreLayerNormModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -58,7 +58,7 @@ class RoCBertModelTester: ...@@ -58,7 +58,7 @@ class RoCBertModelTester:
pronunciation_embed_dim=32, pronunciation_embed_dim=32,
shape_embed_dim=32, shape_embed_dim=32,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -47,7 +47,7 @@ class FlaxRoFormerModelTester(unittest.TestCase): ...@@ -47,7 +47,7 @@ class FlaxRoFormerModelTester(unittest.TestCase):
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -56,7 +56,7 @@ class RoFormerModelTester: ...@@ -56,7 +56,7 @@ class RoFormerModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -52,7 +52,7 @@ class RwkvModelTester: ...@@ -52,7 +52,7 @@ class RwkvModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -65,7 +65,7 @@ class SEWModelTester: ...@@ -65,7 +65,7 @@ class SEWModelTester:
num_conv_pos_embeddings=31, num_conv_pos_embeddings=31,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
squeeze_factor=2, squeeze_factor=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout=0.1, hidden_dropout=0.1,
intermediate_size=20, intermediate_size=20,
......
...@@ -72,7 +72,7 @@ class SEWDModelTester: ...@@ -72,7 +72,7 @@ class SEWDModelTester:
position_biased_input=False, position_biased_input=False,
pos_att_type=("p2c", "c2p"), pos_att_type=("p2c", "c2p"),
norm_rel_ebd="layer_norm", norm_rel_ebd="layer_norm",
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout=0.1, hidden_dropout=0.1,
intermediate_size=20, intermediate_size=20,
......
...@@ -50,7 +50,7 @@ class Speech2Text2StandaloneDecoderModelTester: ...@@ -50,7 +50,7 @@ class Speech2Text2StandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
pad_token_id=0, pad_token_id=0,
......
...@@ -105,7 +105,7 @@ class SpeechT5ModelTester: ...@@ -105,7 +105,7 @@ class SpeechT5ModelTester:
is_training=False, is_training=False,
vocab_size=81, vocab_size=81,
hidden_size=24, hidden_size=24,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
intermediate_size=4, intermediate_size=4,
): ):
...@@ -249,7 +249,7 @@ class SpeechT5ForSpeechToTextTester: ...@@ -249,7 +249,7 @@ class SpeechT5ForSpeechToTextTester:
decoder_seq_length=7, decoder_seq_length=7,
is_training=False, is_training=False,
hidden_size=24, hidden_size=24,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
intermediate_size=4, intermediate_size=4,
conv_dim=(32, 32, 32), conv_dim=(32, 32, 32),
...@@ -786,7 +786,7 @@ class SpeechT5ForTextToSpeechTester: ...@@ -786,7 +786,7 @@ class SpeechT5ForTextToSpeechTester:
decoder_seq_length=1024, # speech is longer decoder_seq_length=1024, # speech is longer
is_training=False, is_training=False,
hidden_size=24, hidden_size=24,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
intermediate_size=4, intermediate_size=4,
vocab_size=81, vocab_size=81,
...@@ -1031,7 +1031,7 @@ class SpeechT5ForSpeechToSpeechTester: ...@@ -1031,7 +1031,7 @@ class SpeechT5ForSpeechToSpeechTester:
decoder_seq_length=1024, decoder_seq_length=1024,
is_training=False, is_training=False,
hidden_size=24, hidden_size=24,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
intermediate_size=4, intermediate_size=4,
conv_dim=(32, 32, 32), conv_dim=(32, 32, 32),
......
...@@ -46,7 +46,7 @@ class SplinterModelTester: ...@@ -46,7 +46,7 @@ class SplinterModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
question_token_id=1, question_token_id=1,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -50,7 +50,7 @@ class SqueezeBertModelTester(object): ...@@ -50,7 +50,7 @@ class SqueezeBertModelTester(object):
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=64, intermediate_size=64,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -58,7 +58,7 @@ class SwitchTransformersModelTester: ...@@ -58,7 +58,7 @@ class SwitchTransformersModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
...@@ -826,7 +826,7 @@ class SwitchTransformersEncoderOnlyModelTester: ...@@ -826,7 +826,7 @@ class SwitchTransformersEncoderOnlyModelTester:
# For common tests # For common tests
use_attention_mask=True, use_attention_mask=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -70,7 +70,7 @@ class FlaxT5ModelTester: ...@@ -70,7 +70,7 @@ class FlaxT5ModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
...@@ -477,7 +477,7 @@ class FlaxT5EncoderOnlyModelTester: ...@@ -477,7 +477,7 @@ class FlaxT5EncoderOnlyModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -71,7 +71,7 @@ class T5ModelTester: ...@@ -71,7 +71,7 @@ class T5ModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
...@@ -902,7 +902,7 @@ class T5EncoderOnlyModelTester: ...@@ -902,7 +902,7 @@ class T5EncoderOnlyModelTester:
# For common tests # For common tests
use_attention_mask=True, use_attention_mask=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -79,7 +79,7 @@ class TapasModelTester: ...@@ -79,7 +79,7 @@ class TapasModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -60,7 +60,7 @@ class TimesformerModelTester: ...@@ -60,7 +60,7 @@ class TimesformerModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -52,7 +52,7 @@ class TransfoXLModelTester: ...@@ -52,7 +52,7 @@ class TransfoXLModelTester:
d_head=8, d_head=8,
d_inner=128, d_inner=128,
div_val=2, div_val=2,
num_hidden_layers=5, num_hidden_layers=2,
scope=None, scope=None,
seed=1, seed=1,
eos_token_id=0, eos_token_id=0,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment