CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)

* CI with layers=2 --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)
* CI with layers=2 --------- Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
bd90cda9 · Yih-Dar · GitHub · b28ebb26 · bd90cda9 · bd90cda9
Unverified Commit bd90cda9 authored Aug 02, 2023 by Yih-Dar Committed by GitHub Aug 02, 2023
20 changed files
--- a/tests/models/albert/test_modeling_albert.py
+++ b/tests/models/albert/test_modeling_albert.py
@@ -54,8 +54,9 @@ class AlbertModelTester:
        vocab_size=99,
        embedding_size=16,
        hidden_size=36,
-        num_hidden_layers=6,
+        num_hidden_layers=2,
-        num_hidden_groups=6,
+        # this needs to be the same as `num_hidden_layers`!
+        num_hidden_groups=2,
        num_attention_heads=6,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/albert/test_modeling_flax_albert.py
+++ b/tests/models/albert/test_modeling_flax_albert.py
@@ -48,7 +48,7 @@ class FlaxAlbertModelTester(unittest.TestCase):
        use_labels=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/align/test_modeling_align.py
+++ b/tests/models/align/test_modeling_align.py
@@ -242,7 +242,7 @@ class AlignTextModelTester:
        use_token_type_ids=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/altclip/test_modeling_altclip.py
+++ b/tests/models/altclip/test_modeling_altclip.py
@@ -60,7 +60,7 @@ class AltCLIPVisionModelTester:
        is_training=True,
        hidden_size=32,
        projection_dim=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,
@@ -212,7 +212,7 @@ class AltCLIPTextModelTester:
        hidden_size=32,
        projection_dim=32,
        project_dim=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,

--- a/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py
+++ b/tests/models/audio_spectrogram_transformer/test_modeling_audio_spectrogram_transformer.py
@@ -55,7 +55,7 @@ class ASTModelTester:
        is_training=True,
        use_labels=True,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/bart/test_modeling_bart.py
+++ b/tests/models/bart/test_modeling_bart.py
@@ -1289,7 +1289,7 @@ class BartStandaloneDecoderModelTester:
        use_labels=True,
        decoder_start_token_id=2,
        decoder_ffn_dim=32,
-        decoder_layers=4,
+        decoder_layers=2,
        encoder_attention_heads=4,
        decoder_attention_heads=4,
        max_position_embeddings=30,

--- a/tests/models/beit/test_modeling_beit.py
+++ b/tests/models/beit/test_modeling_beit.py
@@ -64,7 +64,7 @@ class BeitModelTester:
        is_training=True,
        use_labels=True,
        hidden_size=32,
-        num_hidden_layers=4,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/beit/test_modeling_flax_beit.py
+++ b/tests/models/beit/test_modeling_flax_beit.py
@@ -48,7 +48,7 @@ class FlaxBeitModelTester(unittest.TestCase):
        is_training=True,
        use_labels=True,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/bert/test_modeling_bert.py
+++ b/tests/models/bert/test_modeling_bert.py
@@ -57,7 +57,7 @@ class BertModelTester:
        use_labels=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/bert/test_modeling_flax_bert.py
+++ b/tests/models/bert/test_modeling_flax_bert.py
@@ -47,7 +47,7 @@ class FlaxBertModelTester(unittest.TestCase):
        use_labels=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/bert_generation/test_modeling_bert_generation.py
+++ b/tests/models/bert_generation/test_modeling_bert_generation.py
@@ -41,7 +41,7 @@ class BertGenerationEncoderTester:
        use_input_mask=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
+++ b/tests/models/bigbird_pegasus/test_modeling_bigbird_pegasus.py
@@ -605,7 +605,7 @@ class BigBirdPegasusStandaloneDecoderModelTester:
        use_labels=True,
        decoder_start_token_id=2,
        decoder_ffn_dim=32,
-        decoder_layers=4,
+        decoder_layers=2,
        encoder_attention_heads=4,
        decoder_attention_heads=4,
        max_position_embeddings=30,

--- a/tests/models/biogpt/test_modeling_biogpt.py
+++ b/tests/models/biogpt/test_modeling_biogpt.py
@@ -51,7 +51,7 @@ class BioGptModelTester:
        use_labels=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/blenderbot/test_modeling_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_blenderbot.py
@@ -356,7 +356,7 @@ class BlenderbotStandaloneDecoderModelTester:
        use_labels=True,
        decoder_start_token_id=2,
        decoder_ffn_dim=32,
-        decoder_layers=4,
+        decoder_layers=2,
        encoder_attention_heads=4,
        decoder_attention_heads=4,
        max_position_embeddings=30,

--- a/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_blenderbot_small.py
@@ -365,7 +365,7 @@ class BlenderbotSmallStandaloneDecoderModelTester:
        use_labels=True,
        decoder_start_token_id=2,
        decoder_ffn_dim=32,
-        decoder_layers=4,
+        decoder_layers=2,
        encoder_attention_heads=4,
        decoder_attention_heads=4,
        max_position_embeddings=30,

--- a/tests/models/blip/test_modeling_blip.py
+++ b/tests/models/blip/test_modeling_blip.py
@@ -70,7 +70,7 @@ class BlipVisionModelTester:
        is_training=True,
        hidden_size=32,
        projection_dim=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,
@@ -221,7 +221,7 @@ class BlipTextModelTester:
        vocab_size=99,
        hidden_size=32,
        projection_dim=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,

--- a/tests/models/blip/test_modeling_blip_text.py
+++ b/tests/models/blip/test_modeling_blip_text.py
@@ -44,7 +44,7 @@ class BlipTextModelTester:
        vocab_size=99,
        hidden_size=32,
        projection_dim=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,

--- a/tests/models/blip_2/test_modeling_blip_2.py
+++ b/tests/models/blip_2/test_modeling_blip_2.py
@@ -62,7 +62,7 @@ class Blip2VisionModelTester:
        is_training=True,
        hidden_size=32,
        projection_dim=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,
@@ -215,7 +215,7 @@ class Blip2QFormerModelTester:
        vocab_size=99,
        hidden_size=32,
        projection_dim=32,
-        num_hidden_layers=6,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        dropout=0.1,
@@ -289,7 +289,7 @@ class Blip2TextModelDecoderOnlyTester:
        use_labels=False,
        vocab_size=99,
        hidden_size=16,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=4,
        hidden_act="gelu",
@@ -503,7 +503,7 @@ class Blip2TextModelTester:
        use_attention_mask=True,
        use_labels=True,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        d_ff=37,
        relative_attention_num_buckets=8,

--- a/tests/models/bloom/test_modeling_bloom.py
+++ b/tests/models/bloom/test_modeling_bloom.py
@@ -54,7 +54,7 @@ class BloomModelTester:
        use_mc_token_ids=True,
        vocab_size=99,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",

--- a/tests/models/canine/test_modeling_canine.py
+++ b/tests/models/canine/test_modeling_canine.py
@@ -53,7 +53,7 @@ class CanineModelTester:
        # NOTE: this is not a model parameter, just an input
        vocab_size=100000,
        hidden_size=32,
-        num_hidden_layers=5,
+        num_hidden_layers=2,
        num_attention_heads=4,
        intermediate_size=37,
        hidden_act="gelu",