"vscode:/vscode.git/clone" did not exist on "1815d1865e61df3be1cbe37ee8b78f6017b0b1e2"
Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
...@@ -45,7 +45,7 @@ class GPTSanJapaneseTester: ...@@ -45,7 +45,7 @@ class GPTSanJapaneseTester:
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
ext_size=42, ext_size=42,
num_hidden_layers=5, num_hidden_layers=2,
num_ext_layers=2, num_ext_layers=2,
num_attention_heads=4, num_attention_heads=4,
num_experts=2, num_experts=2,
......
...@@ -356,7 +356,7 @@ class GroupViTTextModelTester: ...@@ -356,7 +356,7 @@ class GroupViTTextModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -553,6 +553,10 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase ...@@ -553,6 +553,10 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def test_model_common_attributes(self): def test_model_common_attributes(self):
pass pass
# overwritten from parent as this equivalent test needs a specific `seed` and hard to get a good one!
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=2e-5, name="outputs", attributes=None):
super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol=tol, name=name, attributes=attributes)
@is_pt_tf_cross_test @is_pt_tf_cross_test
def test_pt_tf_model_equivalence(self): def test_pt_tf_model_equivalence(self):
import tensorflow as tf import tensorflow as tf
......
...@@ -71,7 +71,7 @@ class HubertModelTester: ...@@ -71,7 +71,7 @@ class HubertModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -62,7 +62,7 @@ class IBertModelTester: ...@@ -62,7 +62,7 @@ class IBertModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -65,7 +65,7 @@ class ImageGPTModelTester: ...@@ -65,7 +65,7 @@ class ImageGPTModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -64,7 +64,7 @@ class InstructBlipVisionModelTester: ...@@ -64,7 +64,7 @@ class InstructBlipVisionModelTester:
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -219,7 +219,7 @@ class InstructBlipQFormerModelTester: ...@@ -219,7 +219,7 @@ class InstructBlipQFormerModelTester:
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
projection_dim=32, projection_dim=32,
num_hidden_layers=6, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -295,7 +295,7 @@ class InstructBlipTextModelDecoderOnlyTester: ...@@ -295,7 +295,7 @@ class InstructBlipTextModelDecoderOnlyTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=16, hidden_size=16,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=4, intermediate_size=4,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -48,7 +48,7 @@ class LayoutLMModelTester: ...@@ -48,7 +48,7 @@ class LayoutLMModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -55,7 +55,7 @@ class LayoutLMv2ModelTester: ...@@ -55,7 +55,7 @@ class LayoutLMv2ModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=36, hidden_size=36,
num_hidden_layers=3, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -63,7 +63,7 @@ class LayoutLMv3ModelTester: ...@@ -63,7 +63,7 @@ class LayoutLMv3ModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=36, hidden_size=36,
num_hidden_layers=3, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -46,7 +46,7 @@ class LlamaModelTester: ...@@ -46,7 +46,7 @@ class LlamaModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -50,7 +50,7 @@ class LongformerModelTester: ...@@ -50,7 +50,7 @@ class LongformerModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -71,7 +71,7 @@ class FlaxLongT5ModelTester: ...@@ -71,7 +71,7 @@ class FlaxLongT5ModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -59,7 +59,7 @@ class LongT5ModelTester: ...@@ -59,7 +59,7 @@ class LongT5ModelTester:
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
...@@ -916,7 +916,7 @@ class LongT5EncoderOnlyModelTester: ...@@ -916,7 +916,7 @@ class LongT5EncoderOnlyModelTester:
# For common tests # For common tests
use_attention_mask=True, use_attention_mask=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -61,7 +61,7 @@ class LukeModelTester: ...@@ -61,7 +61,7 @@ class LukeModelTester:
entity_vocab_size=10, entity_vocab_size=10,
entity_emb_size=6, entity_emb_size=6,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -661,7 +661,7 @@ class MarianStandaloneDecoderModelTester: ...@@ -661,7 +661,7 @@ class MarianStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -53,7 +53,7 @@ class MarkupLMModelTester: ...@@ -53,7 +53,7 @@ class MarkupLMModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -491,7 +491,7 @@ class MBartStandaloneDecoderModelTester: ...@@ -491,7 +491,7 @@ class MBartStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -51,7 +51,7 @@ class MegaModelTester: ...@@ -51,7 +51,7 @@ class MegaModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
intermediate_size=37, intermediate_size=37,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
......
...@@ -58,7 +58,7 @@ class MegatronBertModelTester: ...@@ -58,7 +58,7 @@ class MegatronBertModelTester:
vocab_size=99, vocab_size=99,
hidden_size=64, hidden_size=64,
embedding_size=32, embedding_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -55,7 +55,7 @@ class MgpstrModelTester: ...@@ -55,7 +55,7 @@ class MgpstrModelTester:
num_bpe_labels=99, num_bpe_labels=99,
num_wordpiece_labels=99, num_wordpiece_labels=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
mlp_ratio=4.0, mlp_ratio=4.0,
patch_embeds_hidden_size=257, patch_embeds_hidden_size=257,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment