Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
......@@ -45,7 +45,7 @@ class GPTSanJapaneseTester:
is_training=True,
hidden_size=32,
ext_size=42,
num_hidden_layers=5,
num_hidden_layers=2,
num_ext_layers=2,
num_attention_heads=4,
num_experts=2,
......
......@@ -356,7 +356,7 @@ class GroupViTTextModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
......@@ -553,6 +553,10 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def test_model_common_attributes(self):
pass
# overwritten from parent as this equivalent test needs a specific `seed` and hard to get a good one!
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=2e-5, name="outputs", attributes=None):
super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol=tol, name=name, attributes=attributes)
@is_pt_tf_cross_test
def test_pt_tf_model_equivalence(self):
import tensorflow as tf
......
......@@ -71,7 +71,7 @@ class HubertModelTester:
conv_bias=False,
num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20,
......
......@@ -62,7 +62,7 @@ class IBertModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -65,7 +65,7 @@ class ImageGPTModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -64,7 +64,7 @@ class InstructBlipVisionModelTester:
is_training=True,
hidden_size=32,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
......@@ -219,7 +219,7 @@ class InstructBlipQFormerModelTester:
vocab_size=99,
hidden_size=32,
projection_dim=32,
num_hidden_layers=6,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
......@@ -295,7 +295,7 @@ class InstructBlipTextModelDecoderOnlyTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="gelu",
......
......@@ -48,7 +48,7 @@ class LayoutLMModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -55,7 +55,7 @@ class LayoutLMv2ModelTester:
use_labels=True,
vocab_size=99,
hidden_size=36,
num_hidden_layers=3,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -63,7 +63,7 @@ class LayoutLMv3ModelTester:
use_labels=True,
vocab_size=99,
hidden_size=36,
num_hidden_layers=3,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -46,7 +46,7 @@ class LlamaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -50,7 +50,7 @@ class LongformerModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -71,7 +71,7 @@ class FlaxLongT5ModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......
......@@ -59,7 +59,7 @@ class LongT5ModelTester:
use_attention_mask=True,
use_labels=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......@@ -916,7 +916,7 @@ class LongT5EncoderOnlyModelTester:
# For common tests
use_attention_mask=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
......
......@@ -61,7 +61,7 @@ class LukeModelTester:
entity_vocab_size=10,
entity_emb_size=6,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -661,7 +661,7 @@ class MarianStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,
......
......@@ -53,7 +53,7 @@ class MarkupLMModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -491,7 +491,7 @@ class MBartStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,
......
......@@ -51,7 +51,7 @@ class MegaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
intermediate_size=37,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
......
......@@ -58,7 +58,7 @@ class MegatronBertModelTester:
vocab_size=99,
hidden_size=64,
embedding_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -55,7 +55,7 @@ class MgpstrModelTester:
num_bpe_labels=99,
num_wordpiece_labels=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
mlp_ratio=4.0,
patch_embeds_hidden_size=257,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment