Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
...@@ -54,7 +54,7 @@ class ElectraModelTester: ...@@ -54,7 +54,7 @@ class ElectraModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -34,7 +34,7 @@ class FlaxElectraModelTester(unittest.TestCase): ...@@ -34,7 +34,7 @@ class FlaxElectraModelTester(unittest.TestCase):
vocab_size=99, vocab_size=99,
embedding_size=24, embedding_size=24,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -56,7 +56,7 @@ class ErnieModelTester: ...@@ -56,7 +56,7 @@ class ErnieModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -50,7 +50,7 @@ class ErnieMModelTester: ...@@ -50,7 +50,7 @@ class ErnieMModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -49,7 +49,7 @@ class EsmModelTester: ...@@ -49,7 +49,7 @@ class EsmModelTester:
use_labels=True, use_labels=True,
vocab_size=33, vocab_size=33,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -43,7 +43,7 @@ class EsmFoldModelTester: ...@@ -43,7 +43,7 @@ class EsmFoldModelTester:
use_labels=False, use_labels=False,
vocab_size=19, vocab_size=19,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -50,7 +50,7 @@ class FalconModelTester: ...@@ -50,7 +50,7 @@ class FalconModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -57,7 +57,7 @@ class FlaubertModelTester(object): ...@@ -57,7 +57,7 @@ class FlaubertModelTester(object):
vocab_size=99, vocab_size=99,
n_special=0, n_special=0,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
......
...@@ -79,7 +79,7 @@ class FlavaImageModelTester: ...@@ -79,7 +79,7 @@ class FlavaImageModelTester:
parent, parent,
batch_size=12, batch_size=12,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
...@@ -342,7 +342,7 @@ class FlavaTextModelTester: ...@@ -342,7 +342,7 @@ class FlavaTextModelTester:
max_position_embeddings=512, max_position_embeddings=512,
position_embedding_type="absolute", position_embedding_type="absolute",
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
...@@ -487,7 +487,7 @@ class FlavaMultimodalModelTester: ...@@ -487,7 +487,7 @@ class FlavaMultimodalModelTester:
seq_length=44, seq_length=44,
use_input_mask=True, use_input_mask=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -70,7 +70,7 @@ class FNetModelTester: ...@@ -70,7 +70,7 @@ class FNetModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -51,7 +51,7 @@ class GitVisionModelTester: ...@@ -51,7 +51,7 @@ class GitVisionModelTester:
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -203,7 +203,7 @@ class GitModelTester: ...@@ -203,7 +203,7 @@ class GitModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -52,7 +52,7 @@ class FlaxGPT2ModelTester: ...@@ -52,7 +52,7 @@ class FlaxGPT2ModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -56,7 +56,7 @@ class GPT2ModelTester: ...@@ -56,7 +56,7 @@ class GPT2ModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -55,7 +55,7 @@ class GPTBigCodeModelTester: ...@@ -55,7 +55,7 @@ class GPTBigCodeModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="relu", hidden_act="relu",
......
...@@ -52,9 +52,9 @@ class FlaxGPTNeoModelTester: ...@@ -52,9 +52,9 @@ class FlaxGPTNeoModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
attention_types=[[["global", "local"], 2]], attention_types=[[["global", "local"], 1]],
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -54,8 +54,8 @@ class GPTNeoModelTester: ...@@ -54,8 +54,8 @@ class GPTNeoModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=4, num_hidden_layers=2,
attention_types=[[["global", "local"], 2]], attention_types=[[["global", "local"], 1]],
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -52,7 +52,7 @@ class GPTNeoXModelTester: ...@@ -52,7 +52,7 @@ class GPTNeoXModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=64, hidden_size=64,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -44,7 +44,7 @@ class GPTNeoXJapaneseModelTester: ...@@ -44,7 +44,7 @@ class GPTNeoXJapaneseModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_multiple_size=4, intermediate_multiple_size=4,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -53,7 +53,7 @@ class FlaxGPTJModelTester: ...@@ -53,7 +53,7 @@ class FlaxGPTJModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
rotary_dim=4, rotary_dim=4,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -56,7 +56,7 @@ class GPTJModelTester: ...@@ -56,7 +56,7 @@ class GPTJModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
rotary_dim=4, rotary_dim=4,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment