Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
......@@ -54,7 +54,7 @@ class MobileBertModelTester:
vocab_size=99,
hidden_size=64,
embedding_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -49,7 +49,7 @@ class MPNetModelTester:
use_labels=True,
vocab_size=99,
hidden_size=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=64,
hidden_act="gelu",
......
......@@ -54,7 +54,7 @@ class MptModelTester:
use_mc_token_ids=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -51,7 +51,7 @@ class MraModelTester:
use_labels=True,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=2,
intermediate_size=36,
hidden_act="gelu",
......
......@@ -595,7 +595,7 @@ class MvpStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,
......
......@@ -55,7 +55,7 @@ class NezhaModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -52,7 +52,7 @@ class NllbMoeModelTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=4,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="relu",
......
......@@ -51,7 +51,7 @@ class NystromformerModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -49,7 +49,7 @@ class OpenAIGPTModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -70,7 +70,7 @@ class OPTModelTester:
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="gelu",
......
......@@ -62,7 +62,7 @@ class OwlViTVisionModelTester:
num_channels=3,
is_training=True,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
......
......@@ -52,7 +52,7 @@ class FlaxPegasusModelTester:
use_labels=False,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_dropout_prob=0.1,
......
......@@ -371,7 +371,7 @@ class PegasusStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,
......
......@@ -670,7 +670,7 @@ class PegasusXStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,
......
......@@ -71,7 +71,7 @@ class Pix2StructVisionModelTester:
patch_embed_hidden_size=12,
projection_dim=32,
max_patches=64,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
......@@ -230,7 +230,7 @@ class Pix2StructTextModelTester:
vocab_size=99,
hidden_size=12,
projection_dim=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
dropout=0.1,
......
......@@ -473,7 +473,7 @@ class PLBartStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=2,
decoder_ffn_dim=32,
decoder_layers=4,
decoder_layers=2,
encoder_attention_heads=4,
decoder_attention_heads=4,
max_position_embeddings=30,
......
......@@ -55,10 +55,10 @@ class ProphetNetModelTester:
use_labels=True,
decoder_start_token_id=0,
encoder_ffn_dim=32,
num_encoder_layers=4,
num_encoder_layers=2,
num_encoder_attention_heads=4,
decoder_ffn_dim=32,
num_decoder_layers=4,
num_decoder_layers=2,
num_decoder_attention_heads=4,
max_position_embeddings=30,
is_encoder_decoder=True,
......@@ -437,10 +437,10 @@ class ProphetNetModelTester:
decoder_attention_mask=decoder_attention_mask,
labels=lm_labels,
)
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(4.5981, device=torch_device), atol=1e-3))
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(4.5892, device=torch_device), atol=1e-3))
expected_logit_slice = torch.tensor(
[-0.0648, 0.0790, 0.0360, 0.0089, 0.0039, -0.0639, 0.0131], device=torch_device
[-0.0184, 0.0758, -0.0543, -0.0093, 0.0050, -0.0660, -0.1453], device=torch_device
)
self.parent.assertTrue(torch.allclose(result.logits[0, :, 1], expected_logit_slice, atol=1e-3))
......@@ -551,10 +551,10 @@ class ProphetNetStandaloneDecoderModelTester:
use_labels=True,
decoder_start_token_id=0,
encoder_ffn_dim=32,
num_encoder_layers=4,
num_encoder_layers=2,
num_encoder_attention_heads=4,
decoder_ffn_dim=32,
num_decoder_layers=4,
num_decoder_layers=2,
num_decoder_attention_heads=4,
max_position_embeddings=30,
is_encoder_decoder=False,
......@@ -782,10 +782,10 @@ class ProphetNetStandaloneEncoderModelTester:
use_labels=True,
decoder_start_token_id=0,
encoder_ffn_dim=32,
num_encoder_layers=4,
num_encoder_layers=2,
num_encoder_attention_heads=4,
decoder_ffn_dim=32,
num_decoder_layers=4,
num_decoder_layers=2,
num_decoder_attention_heads=4,
max_position_embeddings=30,
is_encoder_decoder=False,
......
......@@ -54,7 +54,7 @@ class QDQBertModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -54,7 +54,7 @@ class RealmModelTester:
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
......@@ -55,7 +55,7 @@ class RemBertModelTester:
hidden_size=32,
input_embedding_size=18,
output_embedding_size=43,
num_hidden_layers=5,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment