Unverified Commit b99098ab authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Patch models (#6326)

* TFAlbertFor{TokenClassification, MultipleChoice}

* Patch models

* BERT and TF BERT info


s

* Update check_repo
parent 6028ed92
...@@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel): ...@@ -933,7 +933,9 @@ class BertForPreTraining(BertPreTrainedModel):
class BertLMHeadModel(BertPreTrainedModel): class BertLMHeadModel(BertPreTrainedModel):
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
assert config.is_decoder, "If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True`."
if not config.is_decoder:
logger.info("If you want to use `BertLMHeadModel` as a standalone, add `is_decoder=True.`")
self.bert = BertModel(config) self.bert = BertModel(config)
self.cls = BertOnlyMLMHead(config) self.cls = BertOnlyMLMHead(config)
...@@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel): ...@@ -1036,9 +1038,12 @@ class BertLMHeadModel(BertPreTrainedModel):
class BertForMaskedLM(BertPreTrainedModel): class BertForMaskedLM(BertPreTrainedModel):
def __init__(self, config): def __init__(self, config):
super().__init__(config) super().__init__(config)
assert (
not config.is_decoder if config.is_decoder:
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention." logger.info(
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
"bi-directional self-attention."
)
self.bert = BertModel(config) self.bert = BertModel(config)
self.cls = BertOnlyMLMHead(config) self.cls = BertOnlyMLMHead(config)
......
...@@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel): ...@@ -860,9 +860,12 @@ class TFBertForPreTraining(TFBertPreTrainedModel):
class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
assert (
not config.is_decoder if config.is_decoder:
), "If you want to use `BertForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention." logger.info(
"If you want to use `TFBertForMaskedLM` make sure `config.is_decoder=False` for "
"bi-directional self-attention."
)
self.bert = TFBertMainLayer(config, name="bert") self.bert = TFBertMainLayer(config, name="bert")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
...@@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss): ...@@ -936,7 +939,9 @@ class TFBertForMaskedLM(TFBertPreTrainedModel, TFMaskedLanguageModelingLoss):
class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss): class TFBertLMHeadModel(TFBertPreTrainedModel, TFCausalLanguageModelingLoss):
def __init__(self, config, *inputs, **kwargs): def __init__(self, config, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs) super().__init__(config, *inputs, **kwargs)
assert config.is_decoder, "If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`"
if not config.is_decoder:
logger.info("If you want to use `TFBertLMHeadModel` as a standalone, add `is_decoder=True.`")
self.bert = TFBertMainLayer(config, name="bert") self.bert = TFBertMainLayer(config, name="bert")
self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls") self.mlm = TFBertMLMHead(config, self.bert.embeddings, name="mlm___cls")
......
...@@ -32,6 +32,7 @@ if is_tf_available(): ...@@ -32,6 +32,7 @@ if is_tf_available():
TFAlbertForMultipleChoice, TFAlbertForMultipleChoice,
TFAlbertForSequenceClassification, TFAlbertForSequenceClassification,
TFAlbertForQuestionAnswering, TFAlbertForQuestionAnswering,
TFAlbertForTokenClassification,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST, TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
) )
...@@ -109,6 +110,7 @@ class TFAlbertModelTester: ...@@ -109,6 +110,7 @@ class TFAlbertModelTester:
config = AlbertConfig( config = AlbertConfig(
vocab_size=self.vocab_size, vocab_size=self.vocab_size,
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
embedding_size=self.embedding_size,
num_hidden_layers=self.num_hidden_layers, num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads, num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size, intermediate_size=self.intermediate_size,
...@@ -198,6 +200,19 @@ class TFAlbertModelTester: ...@@ -198,6 +200,19 @@ class TFAlbertModelTester:
result = model(inputs) result = model(inputs)
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices]) self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
def create_and_check_albert_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels
model = TFAlbertForTokenClassification(config=config)
inputs = {
"input_ids": input_ids,
"attention_mask": input_mask,
"token_type_ids": token_type_ids,
}
result = model(inputs)
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
( (
...@@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -223,6 +238,8 @@ class TFAlbertModelTest(TFModelTesterMixin, unittest.TestCase):
TFAlbertForMaskedLM, TFAlbertForMaskedLM,
TFAlbertForSequenceClassification, TFAlbertForSequenceClassification,
TFAlbertForQuestionAnswering, TFAlbertForQuestionAnswering,
TFAlbertForTokenClassification,
TFAlbertForMultipleChoice,
) )
if is_tf_available() if is_tf_available()
else () else ()
......
...@@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -265,6 +265,7 @@ class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
( (
TFBertModel, TFBertModel,
TFBertForMaskedLM, TFBertForMaskedLM,
TFBertLMHeadModel,
TFBertForNextSentencePrediction, TFBertForNextSentencePrediction,
TFBertForPreTraining, TFBertForPreTraining,
TFBertForQuestionAnswering, TFBertForQuestionAnswering,
......
...@@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -202,6 +202,7 @@ class TFElectraModelTest(TFModelTesterMixin, unittest.TestCase):
TFElectraForTokenClassification, TFElectraForTokenClassification,
TFElectraForMultipleChoice, TFElectraForMultipleChoice,
TFElectraForSequenceClassification, TFElectraForSequenceClassification,
TFElectraForQuestionAnswering,
) )
if is_tf_available() if is_tf_available()
else () else ()
......
...@@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [ ...@@ -18,12 +18,6 @@ IGNORE_NON_TESTED = [
"DPRSpanPredictor", # Building part of bigger (tested) model. "DPRSpanPredictor", # Building part of bigger (tested) model.
"ReformerForMaskedLM", # Needs to be setup as decoder. "ReformerForMaskedLM", # Needs to be setup as decoder.
"T5Stack", # Building part of bigger (tested) model. "T5Stack", # Building part of bigger (tested) model.
"TFAlbertForMultipleChoice", # TODO: fix
"TFAlbertForTokenClassification", # TODO: fix
"TFBertLMHeadModel", # TODO: fix
"TFElectraForMultipleChoice", # Fix is in #6284
"TFElectraForQuestionAnswering", # TODO: fix
"TFElectraForSequenceClassification", # Fix is in #6284
"TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?) "TFElectraMainLayer", # Building part of bigger (tested) model (should it be a TFPreTrainedModel ?)
"TFRobertaForMultipleChoice", # TODO: fix "TFRobertaForMultipleChoice", # TODO: fix
] ]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment