"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "d3f24dfad7446ea3a23c2fa53e791b9ed7f8c7b7"
Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
...@@ -54,8 +54,9 @@ class AlbertModelTester: ...@@ -54,8 +54,9 @@ class AlbertModelTester:
vocab_size=99, vocab_size=99,
embedding_size=16, embedding_size=16,
hidden_size=36, hidden_size=36,
num_hidden_layers=6, num_hidden_layers=2,
num_hidden_groups=6, # this needs to be the same as `num_hidden_layers`!
num_hidden_groups=2,
num_attention_heads=6, num_attention_heads=6,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -48,7 +48,7 @@ class FlaxAlbertModelTester(unittest.TestCase): ...@@ -48,7 +48,7 @@ class FlaxAlbertModelTester(unittest.TestCase):
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -242,7 +242,7 @@ class AlignTextModelTester: ...@@ -242,7 +242,7 @@ class AlignTextModelTester:
use_token_type_ids=True, use_token_type_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -60,7 +60,7 @@ class AltCLIPVisionModelTester: ...@@ -60,7 +60,7 @@ class AltCLIPVisionModelTester:
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -212,7 +212,7 @@ class AltCLIPTextModelTester: ...@@ -212,7 +212,7 @@ class AltCLIPTextModelTester:
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
project_dim=32, project_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
......
...@@ -55,7 +55,7 @@ class ASTModelTester: ...@@ -55,7 +55,7 @@ class ASTModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -1289,7 +1289,7 @@ class BartStandaloneDecoderModelTester: ...@@ -1289,7 +1289,7 @@ class BartStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -64,7 +64,7 @@ class BeitModelTester: ...@@ -64,7 +64,7 @@ class BeitModelTester:
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -48,7 +48,7 @@ class FlaxBeitModelTester(unittest.TestCase): ...@@ -48,7 +48,7 @@ class FlaxBeitModelTester(unittest.TestCase):
is_training=True, is_training=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -57,7 +57,7 @@ class BertModelTester: ...@@ -57,7 +57,7 @@ class BertModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -47,7 +47,7 @@ class FlaxBertModelTester(unittest.TestCase): ...@@ -47,7 +47,7 @@ class FlaxBertModelTester(unittest.TestCase):
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -41,7 +41,7 @@ class BertGenerationEncoderTester: ...@@ -41,7 +41,7 @@ class BertGenerationEncoderTester:
use_input_mask=True, use_input_mask=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -605,7 +605,7 @@ class BigBirdPegasusStandaloneDecoderModelTester: ...@@ -605,7 +605,7 @@ class BigBirdPegasusStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -51,7 +51,7 @@ class BioGptModelTester: ...@@ -51,7 +51,7 @@ class BioGptModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -356,7 +356,7 @@ class BlenderbotStandaloneDecoderModelTester: ...@@ -356,7 +356,7 @@ class BlenderbotStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -365,7 +365,7 @@ class BlenderbotSmallStandaloneDecoderModelTester: ...@@ -365,7 +365,7 @@ class BlenderbotSmallStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -70,7 +70,7 @@ class BlipVisionModelTester: ...@@ -70,7 +70,7 @@ class BlipVisionModelTester:
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -221,7 +221,7 @@ class BlipTextModelTester: ...@@ -221,7 +221,7 @@ class BlipTextModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
......
...@@ -44,7 +44,7 @@ class BlipTextModelTester: ...@@ -44,7 +44,7 @@ class BlipTextModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
......
...@@ -62,7 +62,7 @@ class Blip2VisionModelTester: ...@@ -62,7 +62,7 @@ class Blip2VisionModelTester:
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -215,7 +215,7 @@ class Blip2QFormerModelTester: ...@@ -215,7 +215,7 @@ class Blip2QFormerModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=6, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -289,7 +289,7 @@ class Blip2TextModelDecoderOnlyTester: ...@@ -289,7 +289,7 @@ class Blip2TextModelDecoderOnlyTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=16, hidden_size=16,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=4, intermediate_size=4,
hidden_act="gelu", hidden_act="gelu",
...@@ -503,7 +503,7 @@ class Blip2TextModelTester: ...@@ -503,7 +503,7 @@ class Blip2TextModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -54,7 +54,7 @@ class BloomModelTester: ...@@ -54,7 +54,7 @@ class BloomModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -53,7 +53,7 @@ class CanineModelTester: ...@@ -53,7 +53,7 @@ class CanineModelTester:
# NOTE: this is not a model parameter, just an input # NOTE: this is not a model parameter, just an input
vocab_size=100000, vocab_size=100000,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment