Fix embeddings resizing in TF models (#8657)

* Resize the biases in same time than the embeddings * Trigger CI * Biases are not reset anymore * Remove get_output_embeddings + better LM model detection in generation utils * Apply style * First test on BERT * Update docstring + new name * Apply the new resizing logic to all the models * fix tests * Apply style * Update the template * Fix naming * Fix naming * Apply style * Apply style * Remove unused import * Revert get_output_embeddings * Trigger CI * Update num parameters * Restore get_output_embeddings in TFPretrainedModel and add comments * Style * Add decoder resizing * Style * Fix tests * Separate bias and decoder resize * Fix tests * Fix tests * Apply style * Add bias resizing in MPNet * Trigger CI * Apply style

Fix embeddings resizing in TF models (#8657)
* Resize the biases in same time than the embeddings * Trigger CI * Biases are not reset anymore * Remove get_output_embeddings + better LM model detection in generation utils * Apply style * First test on BERT * Update docstring + new name * Apply the new resizing logic to all the models * fix tests * Apply style * Update the template * Fix naming * Fix naming * Apply style * Apply style * Remove unused import * Revert get_output_embeddings * Trigger CI * Update num parameters * Restore get_output_embeddings in TFPretrainedModel and add comments * Style * Add decoder resizing * Style * Fix tests * Separate bias and decoder resize * Fix tests * Fix tests * Apply style * Add bias resizing in MPNet * Trigger CI * Apply style
51d9c569 · Julien Plu · GitHub · 3552d0e0 · 51d9c569 · 51d9c569
Unverified Commit 51d9c569 authored Dec 14, 2020 by Julien Plu Committed by GitHub Dec 13, 2020
11 changed files
--- a/tests/test_modeling_tf_blenderbot.py
+++ b/tests/test_modeling_tf_blenderbot.py
@@ -18,17 +18,17 @@ import unittest
 from tests.test_configuration_common import ConfigTester
 from tests.test_modeling_tf_bart import TFBartModelTester
 from tests.test_modeling_tf_common import TFModelTesterMixin
-from transformers import (
-    BlenderbotConfig,
-    BlenderbotSmallTokenizer,
-    TFAutoModelForSeq2SeqLM,
-    TFBlenderbotForConditionalGeneration,
-    is_tf_available,
-)
+from transformers import BlenderbotConfig, BlenderbotSmallTokenizer, is_tf_available
 from transformers.file_utils import cached_property
 from transformers.testing_utils import is_pt_tf_cross_test, require_tf, require_tokenizers, slow


+if is_tf_available():
+    import tensorflow as tf
+
+    from transformers import TFAutoModelForSeq2SeqLM, TFBlenderbotForConditionalGeneration
+
+
 class TFBlenderbotModelTester(TFBartModelTester):
    config_updates = dict(
        normalize_before=True,
@@ -65,6 +65,17 @@ class TFBlenderbotModelTest(TFModelTesterMixin, unittest.TestCase):
        # Should be uncommented during patrick TF refactor
        pass

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+

 @is_pt_tf_cross_test
 @require_tokenizers

--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -592,12 +592,26 @@ class TFModelTesterMixin:

    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        list_lm_models = (
+            list(TF_MODEL_FOR_CAUSAL_LM_MAPPING.values())
+            + list(TF_MODEL_FOR_MASKED_LM_MAPPING.values())
+            + list(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values())
+        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            assert isinstance(model.get_input_embeddings(), (tf.keras.layers.Layer, TFAdaptiveEmbedding))
-            x = model.get_output_embeddings()
-            assert x is None or isinstance(x, tf.keras.layers.Layer)
+
+            if model_class in list_lm_models:
+                x = model.get_output_layer_with_bias()
+                assert isinstance(x, tf.keras.layers.Layer)
+                name = model.get_prefix_bias_name()
+                assert isinstance(name, str)
+            else:
+                x = model.get_output_layer_with_bias()
+                assert x is None
+                name = model.get_prefix_bias_name()
+                assert x is None

    def test_determinism(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/test_modeling_tf_gpt2.py
+++ b/tests/test_modeling_tf_gpt2.py
@@ -353,6 +353,17 @@ class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_double_head(*config_and_inputs)

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+
    def test_gpt2_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs)

--- a/tests/test_modeling_tf_lxmert.py
+++ b/tests/test_modeling_tf_lxmert.py
@@ -678,6 +678,25 @@ class TFLxmertModelTest(TFModelTesterMixin, unittest.TestCase):
            extended_model = tf.keras.Model(inputs=[input_ids, visual_feats, visual_pos], outputs=[outputs])
            extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        list_lm_models = [TFLxmertForPreTraining]
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+
+            if model_class in list_lm_models:
+                x = model.get_output_layer_with_bias()
+                assert isinstance(x, tf.keras.layers.Layer)
+                name = model.get_prefix_bias_name()
+                assert isinstance(name, str)
+            else:
+                x = model.get_output_layer_with_bias()
+                assert x is None
+                name = model.get_prefix_bias_name()
+                assert x is None
+
    @slow
    def test_saved_model_with_hidden_states_output(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/test_modeling_tf_marian.py
+++ b/tests/test_modeling_tf_marian.py
@@ -94,6 +94,17 @@ class TestTFMarianCommon(TFModelTesterMixin, unittest.TestCase):
        extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
        extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+

 class AbstractMarianIntegrationTest(unittest.TestCase):
    maxDiff = 1000  # show more chars for failing integration tests

--- a/tests/test_modeling_tf_mbart.py
+++ b/tests/test_modeling_tf_mbart.py
@@ -93,6 +93,17 @@ class TestTFMBartCommon(TFModelTesterMixin, unittest.TestCase):
        extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
        extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+

 @is_pt_tf_cross_test
 @require_sentencepiece

--- a/tests/test_modeling_tf_mobilebert.py
+++ b/tests/test_modeling_tf_mobilebert.py
@@ -283,6 +283,25 @@ class TFMobileBertModelTest(TFModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_mobilebert_for_token_classification(*config_and_inputs)

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        list_lm_models = [TFMobileBertForMaskedLM, TFMobileBertForPreTraining]
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+
+            if model_class in list_lm_models:
+                x = model.get_output_layer_with_bias()
+                assert isinstance(x, tf.keras.layers.Layer)
+                name = model.get_prefix_bias_name()
+                assert isinstance(name, str)
+            else:
+                x = model.get_output_layer_with_bias()
+                assert x is None
+                name = model.get_prefix_bias_name()
+                assert x is None
+
    @slow
    def test_model_from_pretrained(self):
        # for model_name in TF_MOBILEBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

--- a/tests/test_modeling_tf_openai.py
+++ b/tests/test_modeling_tf_openai.py
@@ -202,6 +202,17 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_openai_gpt_double_head(*config_and_inputs)

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+
    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:

--- a/tests/test_modeling_tf_pegasus.py
+++ b/tests/test_modeling_tf_pegasus.py
@@ -99,6 +99,17 @@ class TestTFPegasusCommon(TFModelTesterMixin, unittest.TestCase):
        extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
        extended_model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+

 @is_pt_tf_cross_test
 @require_sentencepiece

--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -282,6 +282,17 @@ class TFT5ModelTest(TFModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_decoder_model_past_large_inputs(*config_and_inputs)

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+
    @slow
    def test_model_from_pretrained(self):
        model = TFT5Model.from_pretrained("t5-small")

--- a/tests/test_modeling_tf_transfo_xl.py
+++ b/tests/test_modeling_tf_transfo_xl.py
@@ -163,6 +163,17 @@ class TFTransfoXLModelTest(TFModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_transfo_xl_lm_head(*config_and_inputs)

+    def test_model_common_attributes(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
+            x = model.get_output_layer_with_bias()
+            assert x is None
+            name = model.get_prefix_bias_name()
+            assert name is None
+
    @slow
    def test_model_from_pretrained(self):
        for model_name in TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: