"vscode:/vscode.git/clone" did not exist on "41dad89f706ac49313963d33cc51486473d7e493"
Unverified Commit bd90cda9 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

CI with `num_hidden_layers=2` 🚀🚀🚀 (#25266)



* CI with layers=2

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent b28ebb26
...@@ -54,7 +54,7 @@ class MobileBertModelTester: ...@@ -54,7 +54,7 @@ class MobileBertModelTester:
vocab_size=99, vocab_size=99,
hidden_size=64, hidden_size=64,
embedding_size=32, embedding_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -49,7 +49,7 @@ class MPNetModelTester: ...@@ -49,7 +49,7 @@ class MPNetModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=64, hidden_size=64,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=64, intermediate_size=64,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -54,7 +54,7 @@ class MptModelTester: ...@@ -54,7 +54,7 @@ class MptModelTester:
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -51,7 +51,7 @@ class MraModelTester: ...@@ -51,7 +51,7 @@ class MraModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=16, hidden_size=16,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
intermediate_size=36, intermediate_size=36,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -595,7 +595,7 @@ class MvpStandaloneDecoderModelTester: ...@@ -595,7 +595,7 @@ class MvpStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -55,7 +55,7 @@ class NezhaModelTester: ...@@ -55,7 +55,7 @@ class NezhaModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -52,7 +52,7 @@ class NllbMoeModelTester: ...@@ -52,7 +52,7 @@ class NllbMoeModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=16, hidden_size=16,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=4, intermediate_size=4,
hidden_act="relu", hidden_act="relu",
......
...@@ -51,7 +51,7 @@ class NystromformerModelTester: ...@@ -51,7 +51,7 @@ class NystromformerModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -49,7 +49,7 @@ class OpenAIGPTModelTester: ...@@ -49,7 +49,7 @@ class OpenAIGPTModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -70,7 +70,7 @@ class OPTModelTester: ...@@ -70,7 +70,7 @@ class OPTModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=16, hidden_size=16,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=4, intermediate_size=4,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -62,7 +62,7 @@ class OwlViTVisionModelTester: ...@@ -62,7 +62,7 @@ class OwlViTVisionModelTester:
num_channels=3, num_channels=3,
is_training=True, is_training=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
......
...@@ -52,7 +52,7 @@ class FlaxPegasusModelTester: ...@@ -52,7 +52,7 @@ class FlaxPegasusModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -371,7 +371,7 @@ class PegasusStandaloneDecoderModelTester: ...@@ -371,7 +371,7 @@ class PegasusStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -670,7 +670,7 @@ class PegasusXStandaloneDecoderModelTester: ...@@ -670,7 +670,7 @@ class PegasusXStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -71,7 +71,7 @@ class Pix2StructVisionModelTester: ...@@ -71,7 +71,7 @@ class Pix2StructVisionModelTester:
patch_embed_hidden_size=12, patch_embed_hidden_size=12,
projection_dim=32, projection_dim=32,
max_patches=64, max_patches=64,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -230,7 +230,7 @@ class Pix2StructTextModelTester: ...@@ -230,7 +230,7 @@ class Pix2StructTextModelTester:
vocab_size=99, vocab_size=99,
hidden_size=12, hidden_size=12,
projection_dim=32, projection_dim=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
......
...@@ -473,7 +473,7 @@ class PLBartStandaloneDecoderModelTester: ...@@ -473,7 +473,7 @@ class PLBartStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=2, decoder_start_token_id=2,
decoder_ffn_dim=32, decoder_ffn_dim=32,
decoder_layers=4, decoder_layers=2,
encoder_attention_heads=4, encoder_attention_heads=4,
decoder_attention_heads=4, decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
......
...@@ -55,10 +55,10 @@ class ProphetNetModelTester: ...@@ -55,10 +55,10 @@ class ProphetNetModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=0, decoder_start_token_id=0,
encoder_ffn_dim=32, encoder_ffn_dim=32,
num_encoder_layers=4, num_encoder_layers=2,
num_encoder_attention_heads=4, num_encoder_attention_heads=4,
decoder_ffn_dim=32, decoder_ffn_dim=32,
num_decoder_layers=4, num_decoder_layers=2,
num_decoder_attention_heads=4, num_decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
is_encoder_decoder=True, is_encoder_decoder=True,
...@@ -437,10 +437,10 @@ class ProphetNetModelTester: ...@@ -437,10 +437,10 @@ class ProphetNetModelTester:
decoder_attention_mask=decoder_attention_mask, decoder_attention_mask=decoder_attention_mask,
labels=lm_labels, labels=lm_labels,
) )
self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(4.5981, device=torch_device), atol=1e-3)) self.parent.assertTrue(torch.allclose(result.loss, torch.tensor(4.5892, device=torch_device), atol=1e-3))
expected_logit_slice = torch.tensor( expected_logit_slice = torch.tensor(
[-0.0648, 0.0790, 0.0360, 0.0089, 0.0039, -0.0639, 0.0131], device=torch_device [-0.0184, 0.0758, -0.0543, -0.0093, 0.0050, -0.0660, -0.1453], device=torch_device
) )
self.parent.assertTrue(torch.allclose(result.logits[0, :, 1], expected_logit_slice, atol=1e-3)) self.parent.assertTrue(torch.allclose(result.logits[0, :, 1], expected_logit_slice, atol=1e-3))
...@@ -551,10 +551,10 @@ class ProphetNetStandaloneDecoderModelTester: ...@@ -551,10 +551,10 @@ class ProphetNetStandaloneDecoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=0, decoder_start_token_id=0,
encoder_ffn_dim=32, encoder_ffn_dim=32,
num_encoder_layers=4, num_encoder_layers=2,
num_encoder_attention_heads=4, num_encoder_attention_heads=4,
decoder_ffn_dim=32, decoder_ffn_dim=32,
num_decoder_layers=4, num_decoder_layers=2,
num_decoder_attention_heads=4, num_decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
is_encoder_decoder=False, is_encoder_decoder=False,
...@@ -782,10 +782,10 @@ class ProphetNetStandaloneEncoderModelTester: ...@@ -782,10 +782,10 @@ class ProphetNetStandaloneEncoderModelTester:
use_labels=True, use_labels=True,
decoder_start_token_id=0, decoder_start_token_id=0,
encoder_ffn_dim=32, encoder_ffn_dim=32,
num_encoder_layers=4, num_encoder_layers=2,
num_encoder_attention_heads=4, num_encoder_attention_heads=4,
decoder_ffn_dim=32, decoder_ffn_dim=32,
num_decoder_layers=4, num_decoder_layers=2,
num_decoder_attention_heads=4, num_decoder_attention_heads=4,
max_position_embeddings=30, max_position_embeddings=30,
is_encoder_decoder=False, is_encoder_decoder=False,
......
...@@ -54,7 +54,7 @@ class QDQBertModelTester: ...@@ -54,7 +54,7 @@ class QDQBertModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -54,7 +54,7 @@ class RealmModelTester: ...@@ -54,7 +54,7 @@ class RealmModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -55,7 +55,7 @@ class RemBertModelTester: ...@@ -55,7 +55,7 @@ class RemBertModelTester:
hidden_size=32, hidden_size=32,
input_embedding_size=18, input_embedding_size=18,
output_embedding_size=43, output_embedding_size=43,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment