Unverified Commit 134caef3 authored by Matt's avatar Matt Committed by GitHub
Browse files

Speed up TF tests by reducing hidden layer counts (#24595)

* hidden layers, huh, what are they good for (absolutely nothing)

* Some tests break with 1 hidden layer, use 2

* Use 1 hidden layer in a few slow models

* Use num_hidden_layers=2 everywhere

* Slightly higher tol for groupvit

* Slightly higher tol for groupvit
parent 3441ad7d
...@@ -51,7 +51,7 @@ class TFGPTJModelTester: ...@@ -51,7 +51,7 @@ class TFGPTJModelTester:
self.vocab_size = 99 self.vocab_size = 99
self.hidden_size = 32 self.hidden_size = 32
self.rotary_dim = 4 self.rotary_dim = 4
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -150,6 +150,10 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -150,6 +150,10 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
test_head_masking = False test_head_masking = False
test_onnx = False test_onnx = False
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-4, name="outputs", attributes=None):
# We override with a slightly higher tol value, as this model tends to diverge a bit more
super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes)
def setUp(self): def setUp(self):
self.model_tester = TFGroupViTVisionModelTester(self) self.model_tester = TFGroupViTVisionModelTester(self)
self.config_tester = ConfigTester( self.config_tester = ConfigTester(
...@@ -381,7 +385,7 @@ class TFGroupViTTextModelTester: ...@@ -381,7 +385,7 @@ class TFGroupViTTextModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
dropout=0.1, dropout=0.1,
...@@ -459,6 +463,10 @@ class TFGroupViTTextModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -459,6 +463,10 @@ class TFGroupViTTextModelTest(TFModelTesterMixin, unittest.TestCase):
test_head_masking = False test_head_masking = False
test_onnx = False test_onnx = False
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-4, name="outputs", attributes=None):
# We override with a slightly higher tol value, as this model tends to diverge a bit more
super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes)
def setUp(self): def setUp(self):
self.model_tester = TFGroupViTTextModelTester(self) self.model_tester = TFGroupViTTextModelTester(self)
self.config_tester = ConfigTester(self, config_class=GroupViTTextConfig, hidden_size=37) self.config_tester = ConfigTester(self, config_class=GroupViTTextConfig, hidden_size=37)
...@@ -581,6 +589,10 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test ...@@ -581,6 +589,10 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
test_attention_outputs = False test_attention_outputs = False
test_onnx = False test_onnx = False
def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-4, name="outputs", attributes=None):
# We override with a slightly higher tol value, as this model tends to diverge a bit more
super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes)
def setUp(self): def setUp(self):
self.model_tester = TFGroupViTModelTester(self) self.model_tester = TFGroupViTModelTester(self)
......
...@@ -59,7 +59,7 @@ class TFHubertModelTester: ...@@ -59,7 +59,7 @@ class TFHubertModelTester:
conv_bias=False, conv_bias=False,
num_conv_pos_embeddings=16, num_conv_pos_embeddings=16,
num_conv_pos_embedding_groups=2, num_conv_pos_embedding_groups=2,
num_hidden_layers=4, num_hidden_layers=2,
num_attention_heads=2, num_attention_heads=2,
hidden_dropout_prob=0.1, # this is most likely not correctly set yet hidden_dropout_prob=0.1, # this is most likely not correctly set yet
intermediate_size=20, intermediate_size=20,
......
...@@ -52,7 +52,7 @@ class TFLayoutLMModelTester: ...@@ -52,7 +52,7 @@ class TFLayoutLMModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -69,7 +69,7 @@ class TFLayoutLMv3ModelTester: ...@@ -69,7 +69,7 @@ class TFLayoutLMv3ModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=36, hidden_size=36,
num_hidden_layers=3, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -47,7 +47,7 @@ class TFLEDModelTester: ...@@ -47,7 +47,7 @@ class TFLEDModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -56,7 +56,7 @@ class TFLongformerModelTester: ...@@ -56,7 +56,7 @@ class TFLongformerModelTester:
self.use_labels = True self.use_labels = True
self.vocab_size = 99 self.vocab_size = 99
self.hidden_size = 32 self.hidden_size = 32
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -49,7 +49,7 @@ class TFMarianModelTester: ...@@ -49,7 +49,7 @@ class TFMarianModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -47,7 +47,7 @@ class TFMBartModelTester: ...@@ -47,7 +47,7 @@ class TFMBartModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -97,7 +97,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te ...@@ -97,7 +97,7 @@ class TFMobileBertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
embedding_size=32, embedding_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -51,7 +51,7 @@ class TFMPNetModelTester: ...@@ -51,7 +51,7 @@ class TFMPNetModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=64, hidden_size=64,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=64, intermediate_size=64,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -53,7 +53,7 @@ class TFOpenAIGPTModelTester: ...@@ -53,7 +53,7 @@ class TFOpenAIGPTModelTester:
self.use_mc_token_ids = True self.use_mc_token_ids = True
self.vocab_size = 99 self.vocab_size = 99
self.hidden_size = 32 self.hidden_size = 32
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -47,7 +47,7 @@ class TFPegasusModelTester: ...@@ -47,7 +47,7 @@ class TFPegasusModelTester:
use_labels=False, use_labels=False,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
......
...@@ -54,7 +54,7 @@ class TFRemBertModelTester: ...@@ -54,7 +54,7 @@ class TFRemBertModelTester:
hidden_size=32, hidden_size=32,
input_embedding_size=18, input_embedding_size=18,
output_embedding_size=43, output_embedding_size=43,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
...@@ -79,7 +79,7 @@ class TFRemBertModelTester: ...@@ -79,7 +79,7 @@ class TFRemBertModelTester:
self.hidden_size = 32 self.hidden_size = 32
self.input_embedding_size = input_embedding_size self.input_embedding_size = input_embedding_size
self.output_embedding_size = output_embedding_size self.output_embedding_size = output_embedding_size
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -56,7 +56,7 @@ class TFRobertaModelTester: ...@@ -56,7 +56,7 @@ class TFRobertaModelTester:
self.use_labels = True self.use_labels = True
self.vocab_size = 99 self.vocab_size = 99
self.hidden_size = 32 self.hidden_size = 32
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -57,7 +57,7 @@ class TFRobertaPreLayerNormModelTester: ...@@ -57,7 +57,7 @@ class TFRobertaPreLayerNormModelTester:
self.use_labels = True self.use_labels = True
self.vocab_size = 99 self.vocab_size = 99
self.hidden_size = 32 self.hidden_size = 32
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -56,7 +56,7 @@ class TFRoFormerModelTester: ...@@ -56,7 +56,7 @@ class TFRoFormerModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
...@@ -79,7 +79,7 @@ class TFRoFormerModelTester: ...@@ -79,7 +79,7 @@ class TFRoFormerModelTester:
self.use_labels = True self.use_labels = True
self.vocab_size = 99 self.vocab_size = 99
self.hidden_size = 32 self.hidden_size = 32
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.intermediate_size = 37 self.intermediate_size = 37
self.hidden_act = "gelu" self.hidden_act = "gelu"
......
...@@ -46,7 +46,7 @@ class TFT5ModelTester: ...@@ -46,7 +46,7 @@ class TFT5ModelTester:
self.vocab_size = 99 self.vocab_size = 99
self.n_positions = 14 self.n_positions = 14
self.hidden_size = 32 self.hidden_size = 32
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.num_attention_heads = 4 self.num_attention_heads = 4
self.d_ff = 37 self.d_ff = 37
self.relative_attention_num_buckets = 8 self.relative_attention_num_buckets = 8
...@@ -325,7 +325,7 @@ class TFT5EncoderOnlyModelTester: ...@@ -325,7 +325,7 @@ class TFT5EncoderOnlyModelTester:
# For common tests # For common tests
use_attention_mask=True, use_attention_mask=True,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
......
...@@ -77,7 +77,7 @@ class TFTapasModelTester: ...@@ -77,7 +77,7 @@ class TFTapasModelTester:
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=2,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
......
...@@ -59,7 +59,7 @@ class TFTransfoXLModelTester: ...@@ -59,7 +59,7 @@ class TFTransfoXLModelTester:
self.d_head = 8 self.d_head = 8
self.d_inner = 128 self.d_inner = 128
self.div_val = 2 self.div_val = 2
self.num_hidden_layers = 5 self.num_hidden_layers = 2
self.scope = None self.scope = None
self.seed = 1 self.seed = 1
self.eos_token_id = 0 self.eos_token_id = 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment