"docs/source/vscode:/vscode.git/clone" did not exist on "969534af4bf8049b674917c712dd9c1f9ae88242"
Unverified Commit 51d9c569 authored by Julien Plu's avatar Julien Plu Committed by GitHub
Browse files

Fix embeddings resizing in TF models (#8657)

* Resize the biases in same time than the embeddings

* Trigger CI

* Biases are not reset anymore

* Remove get_output_embeddings + better LM model detection in generation utils

* Apply style

* First test on BERT

* Update docstring + new name

* Apply the new resizing logic to all the models

* fix tests

* Apply style

* Update the template

* Fix naming

* Fix naming

* Apply style

* Apply style

* Remove unused import

* Revert get_output_embeddings

* Trigger CI

* Update num parameters

* Restore get_output_embeddings in TFPretrainedModel and add comments

* Style

* Add decoder resizing

* Style

* Fix tests

* Separate bias and decoder resize

* Fix tests

* Fix tests

* Apply style

* Add bias resizing in MPNet

* Trigger CI

* Apply style
parent 3552d0e0
...@@ -18,17 +18,17 @@ import unittest ...@@ -18,17 +18,17 @@ import unittest
from tests.test_configuration_common import ConfigTester from tests.test_configuration_common import ConfigTester
from tests.test_modeling_tf_bart import TFBartModelTester from tests.test_modeling_tf_bart import TFBartModelTester
from tests.test_modeling_tf_common import TFModelTesterMixin from tests.test_modeling_tf_common import TFModelTesterMixin
from transformers import ( from transformers import BlenderbotConfig, BlenderbotSmallTokenizer, is_tf_available
BlenderbotConfig,
BlenderbotSmallTokenizer,
TFAutoModelForSeq2SeqLM,
TFBlenderbotForConditionalGeneration,
is_tf_available,
)
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_tokenizers, slow from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_tokenizers, slow
if is_tf_available():
import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM, TFBlenderbotForConditionalGeneration
class TFBlenderbotModelTester(TFBartModelTester): class TFBlenderbotModelTester(TFBartModelTester):
config_updates = dict( config_updates = dict(
normalize_before=True, normalize_before=True,
...@@ -65,6 +65,17 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -65,6 +65,17 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
# Should be uncommented during patrick TF refactor # Should be uncommented during patrick TF refactor
pass pass
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@is_pt_tf_cross_test @is_pt_tf_cross_test
@require_tokenizers @require_tokenizers
......
...@@ -592,12 +592,26 @@ class TFModelTesterMixin: ...@@ -592,12 +592,26 @@ class TFModelTesterMixin:
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = (
list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values())
+ list(TF_MODEL_FOR_MASKED_LM_MAPPING.values())
+ list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values())
)
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding)) assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding))
x = model.get_output_embeddings()
assert x is None or isinstance(x, tf.keras.layers.Layer) if model_class in list_lm_models:
x = model.get_output_layer_with_bias()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name()
assert isinstance(name, str)
else:
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert x is None
def test_determinism(self): def test_determinism(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
...@@ -353,6 +353,17 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -353,6 +353,17 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs) self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
def test_gpt2_sequence_classification_model(self): def test_gpt2_sequence_classification_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs) self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs)
......
...@@ -678,6 +678,25 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -678,6 +678,25 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids, visual_feats, visual_pos], outputs=[outputs]) extended_model = tf.keras.Model(inputs=[input_ids, visual_feats, visual_pos], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = [TFLxmertForPreTraining]
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_layer_with_bias()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name()
assert isinstance(name, str)
else:
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert x is None
@slow @slow
def test_saved_model_with_hidden_states_output(self): def test_saved_model_with_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
...@@ -94,6 +94,17 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase): ...@@ -94,6 +94,17 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs]) extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
class AbstractMarianIntegrationTest(unittest.TestCase): class AbstractMarianIntegrationTest(unittest.TestCase):
maxDiff = 1000 # show more chars for failing integration tests maxDiff = 1000 # show more chars for failing integration tests
......
...@@ -93,6 +93,17 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase): ...@@ -93,6 +93,17 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs]) extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@is_pt_tf_cross_test @is_pt_tf_cross_test
@require_sentencepiece @require_sentencepiece
......
...@@ -283,6 +283,25 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -283,6 +283,25 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_mobilebert_for_token_classification(*config_and_inputs) self.model_tester.create_and_check_mobilebert_for_token_classification(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = [TFMobileBertForMaskedLM, TFMobileBertForPreTraining]
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_layer_with_bias()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name()
assert isinstance(name, str)
else:
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert x is None
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
# for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: # for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
...@@ -202,6 +202,17 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -202,6 +202,17 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs) self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
...@@ -99,6 +99,17 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase): ...@@ -99,6 +99,17 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs]) extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@is_pt_tf_cross_test @is_pt_tf_cross_test
@require_sentencepiece @require_sentencepiece
......
...@@ -282,6 +282,17 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -282,6 +282,17 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs) self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
model = TFT5Model.from_pretrained("t5-small") model = TFT5Model.from_pretrained("t5-small")
......
...@@ -163,6 +163,17 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -163,6 +163,17 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_transfo_xl_lm_head(*config_and_inputs) self.model_tester.create_and_check_transfo_xl_lm_head(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment