Patch models (#6326)

* TFAlbertFor{TokenClassification, MultipleChoice} * Patch models * BERT and TF BERT info s * Update check_repo

Patch models (#6326)
* TFAlbertFor{TokenClassification, MultipleChoice} * Patch models * BERT and TF BERT info s * Update check_repo
b99098ab · Lysandre Debut · GitHub · 6028ed92 · b99098ab · b99098ab
Unverified Commit b99098ab authored Aug 10, 2020 by Lysandre Debut Committed by GitHub Aug 10, 2020
6 changed files
--- a/src/transformers/modeling_bert.py
+++ b/src/transformers/modeling_bert.py
@@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel):
 class BertLMHeadModel(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
-        assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
+        if not config.is_decoder:
+            logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`")
        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)
@@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel):
 class BertForMaskedLM(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
-        assert (
-            not config.is_decoder
+        if config.is_decoder:
-        ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
+            logger.info(
+                "If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)

--- a/src/transformers/modeling_tf_bert.py
+++ b/src/transformers/modeling_tf_bert.py
@@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
 class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
-        assert (
-            not config.is_decoder
+        if config.is_decoder:
-        ), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
+            logger.info(
+                "If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
+                "bi-directional self-attention."
+            )
        self.bert = TFBertMainLayer(config, name="bert")
        self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
@@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
 class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
-        assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`"
+        if not config.is_decoder:
+            logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`")
        self.bert = TFBertMainLayer(config, name="bert")
        self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")

--- a/tests/test_modeling_tf_albert.py
+++ b/tests/test_modeling_tf_albert.py
@@ -32,6 +32,7 @@ if is_tf_available():
        TFAlbertForMultipleChoice,
        TFAlbertForSequenceClassification,
        TFAlbertForQuestionAnswering,
+        TFAlbertForTokenClassification,
        TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
    )
@@ -109,6 +110,7 @@ class TFAlbertModelTester:
        config = AlbertConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
+            embedding_size=self.embedding_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
@@ -198,6 +200,19 @@ class TFAlbertModelTester:
        result = model(inputs)
        self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
+    def create_and_check_albert_for_token_classification(
+        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
+    ):
+        config.num_labels = self.num_labels
+        model = TFAlbertForTokenClassification(config=config)
+        inputs = {
+            "input_ids": input_ids,
+            "attention_mask": input_mask,
+            "token_type_ids": token_type_ids,
+        }
+        result = model(inputs)
+        self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
    def prepare_config_and_inputs_for_common(self):
        config_and_inputs = self.prepare_config_and_inputs()
        (
@@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
            TFAlbertForMaskedLM,
            TFAlbertForSequenceClassification,
            TFAlbertForQuestionAnswering,
+            TFAlbertForTokenClassification,
+            TFAlbertForMultipleChoice,
        )
        if is_tf_available()
        else ()

--- a/tests/test_modeling_tf_bert.py
+++ b/tests/test_modeling_tf_bert.py
@@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
        (
            TFBertModel,
            TFBertForMaskedLM,
+            TFBertLMHeadModel,
            TFBertForNextSentencePrediction,
            TFBertForPreTraining,
            TFBertForQuestionAnswering,

--- a/tests/test_modeling_tf_electra.py
+++ b/tests/test_modeling_tf_electra.py
@@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
            TFElectraForTokenClassification,
            TFElectraForMultipleChoice,
            TFElectraForSequenceClassification,
+            TFElectraForQuestionAnswering,
        )
        if is_tf_available()
        else ()

--- a/utils/check_repo.py
+++ b/utils/check_repo.py
@@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [
    "DPRSpanPredictor",  # Building part of bigger (tested) model.
    "ReformerForMaskedLM",  # Needs to be setup as decoder.
    "T5Stack",  # Building part of bigger (tested) model.
-    "TFAlbertForMultipleChoice",  # TODO: fix
-    "TFAlbertForTokenClassification",  # TODO: fix
-    "TFBertLMHeadModel",  # TODO: fix
-    "TFElectraForMultipleChoice",  # Fix is in #6284
-    "TFElectraForQuestionAnswering",  # TODO: fix
-    "TFElectraForSequenceClassification",  # Fix is in #6284
    "TFElectraMainLayer",  # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
    "TFRobertaForMultipleChoice",  # TODO: fix
 ]