Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
...@@ -47,7 +47,7 @@ class TrOCRStandaloneDecoderModelTester: ...@@ -47,7 +47,7 @@ class TrOCRStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
pad_token_id=0, pad_token_id=0,
......
...@@ -68,7 +68,7 @@ class TvltModelTester: ...@@ -68,7 +68,7 @@ class TvltModelTester:
num_audio_channels=1, num_audio_channels=1,
num_frames=2, num_frames=2,
hidden_size=32, hidden_size=32,
num_hidden_layers=3, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=128, intermediate_size=128,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -64,7 +64,7 @@ class UMT5ModelTester: ...@@ -64,7 +64,7 @@ class UMT5ModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=False, use_labels=False,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -65,7 +65,7 @@ class UniSpeechModelTester: ...@@ -65,7 +65,7 @@ class UniSpeechModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -67,7 +67,7 @@ class UniSpeechSatModelTester: ...@@ -67,7 +67,7 @@ class UniSpeechSatModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -62,7 +62,7 @@ class VideoMAEModelTester: ...@@ -62,7 +62,7 @@ class VideoMAEModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -65,7 +65,7 @@ class ViltModelTester: ...@@ -65,7 +65,7 @@ class ViltModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -54,7 +54,7 @@ class VisualBertModelTester: ...@@ -54,7 +54,7 @@ class VisualBertModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -41,7 +41,7 @@ class FlaxViTModelTester(unittest.TestCase): ...@@ -41,7 +41,7 @@ class FlaxViTModelTester(unittest.TestCase):
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -59,7 +59,7 @@ class ViTModelTester: ...@@ -59,7 +59,7 @@ class ViTModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -50,7 +50,7 @@ class ViTHybridModelTester: ...@@ -50,7 +50,7 @@ class ViTHybridModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -56,7 +56,7 @@ class ViTMAEModelTester: ...@@ -56,7 +56,7 @@ class ViTMAEModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -52,7 +52,7 @@ class ViTMSNModelTester: ...@@ -52,7 +52,7 @@ class ViTMSNModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -123,7 +123,7 @@ class FlaxWav2Vec2ModelTester: ...@@ -123,7 +123,7 @@ class FlaxWav2Vec2ModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -153,7 +153,7 @@ class Wav2Vec2ModelTester: ...@@ -153,7 +153,7 @@ class Wav2Vec2ModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -71,7 +71,7 @@ class Wav2Vec2ConformerModelTester: ...@@ -71,7 +71,7 @@ class Wav2Vec2ConformerModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
intermediate_size=20, intermediate_size=20,
......
...@@ -64,7 +64,7 @@ class WavLMModelTester: ...@@ -64,7 +64,7 @@ class WavLMModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -61,7 +61,7 @@ class XCLIPVisionModelTester: ...@@ -61,7 +61,7 @@ class XCLIPVisionModelTester:
num_frames=8, # important; the batch size * time must be divisible by the number of frames num_frames=8, # important; the batch size * time must be divisible by the number of frames
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
mit_hidden_size=64, mit_hidden_size=64,
...@@ -318,7 +318,7 @@ class XCLIPTextModelTester: ...@@ -318,7 +318,7 @@ class XCLIPTextModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
......
...@@ -53,7 +53,7 @@ class FlaxXGLMModelTester: ...@@ -53,7 +53,7 @@ class FlaxXGLMModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
d_model=32, d_model=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
ffn_dim=37, ffn_dim=37,
activation_function="gelu", activation_function="gelu",
......
...@@ -44,7 +44,7 @@ class XGLMModelTester: ...@@ -44,7 +44,7 @@ class XGLMModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
d_model=32, d_model=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
ffn_dim=37, ffn_dim=37,
activation_function="gelu", activation_function="gelu",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment