Unverified Commit 51d9c569 authored by Julien Plu's avatar Julien Plu Committed by GitHub
Browse files

Fix embeddings resizing in TF models (#8657)

* Resize the biases in same time than the embeddings

* Trigger CI

* Biases are not reset anymore

* Remove get_output_embeddings + better LM model detection in generation utils

* Apply style

* First test on BERT

* Update docstring + new name

* Apply the new resizing logic to all the models

* fix tests

* Apply style

* Update the template

* Fix naming

* Fix naming

* Apply style

* Apply style

* Remove unused import

* Revert get_output_embeddings

* Trigger CI

* Update num parameters

* Restore get_output_embeddings in TFPretrainedModel and add comments

* Style

* Add decoder resizing

* Style

* Fix tests

* Separate bias and decoder resize

* Fix tests

* Fix tests

* Apply style

* Add bias resizing in MPNet

* Trigger CI

* Apply style
parent 3552d0e0
......@@ -18,17 +18,17 @@ import unittest
from tests.test_configuration_common import ConfigTester
from tests.test_modeling_tf_bart import TFBartModelTester
from tests.test_modeling_tf_common import TFModelTesterMixin
from transformers import (
BlenderbotConfig,
BlenderbotSmallTokenizer,
TFAutoModelForSeq2SeqLM,
TFBlenderbotForConditionalGeneration,
is_tf_available,
)
from transformers import BlenderbotConfig, BlenderbotSmallTokenizer, is_tf_available
from transformers.file_utils import cached_property
from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_tokenizers, slow
if is_tf_available():
import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM, TFBlenderbotForConditionalGeneration
class TFBlenderbotModelTester(TFBartModelTester):
config_updates = dict(
normalize_before=True,
......@@ -65,6 +65,17 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
# Should be uncommented during patrick TF refactor
pass
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@is_pt_tf_cross_test
@require_tokenizers
......
......@@ -592,12 +592,26 @@ class TFModelTesterMixin:
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = (
list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values())
+ list(TF_MODEL_FOR_MASKED_LM_MAPPING.values())
+ list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values())
)
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding))
x = model.get_output_embeddings()
assert x is None or isinstance(x, tf.keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_layer_with_bias()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name()
assert isinstance(name, str)
else:
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert x is None
def test_determinism(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
......@@ -353,6 +353,17 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
def test_gpt2_sequence_classification_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs)
......
......@@ -678,6 +678,25 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids, visual_feats, visual_pos], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = [TFLxmertForPreTraining]
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_layer_with_bias()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name()
assert isinstance(name, str)
else:
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert x is None
@slow
def test_saved_model_with_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
......@@ -94,6 +94,17 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
class AbstractMarianIntegrationTest(unittest.TestCase):
maxDiff = 1000 # show more chars for failing integration tests
......
......@@ -93,6 +93,17 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@is_pt_tf_cross_test
@require_sentencepiece
......
......@@ -283,6 +283,25 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_mobilebert_for_token_classification(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
list_lm_models = [TFMobileBertForMaskedLM, TFMobileBertForPreTraining]
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_layer_with_bias()
assert isinstance(x, tf.keras.layers.Layer)
name = model.get_prefix_bias_name()
assert isinstance(name, str)
else:
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert x is None
@slow
def test_model_from_pretrained(self):
# for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
......@@ -202,6 +202,17 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@slow
def test_model_from_pretrained(self):
for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
......@@ -99,6 +99,17 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase):
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@is_pt_tf_cross_test
@require_sentencepiece
......
......@@ -282,6 +282,17 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@slow
def test_model_from_pretrained(self):
model = TFT5Model.from_pretrained("t5-small")
......
......@@ -163,6 +163,17 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_transfo_xl_lm_head(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
@slow
def test_model_from_pretrained(self):
for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment