Unverified Commit 4dc65591 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Almost all TF models] TF clean up: add missing CLM / MLM loss; fix T5 naming...


[Almost all TF models] TF clean up: add missing CLM / MLM loss; fix T5 naming and keras compile (#5395)

* add first version of clm tf

* make style

* add more tests for bert

* update tf clm loss

* fix tests

* correct tf ner script

* add mlm loss

* delete bogus file

* clean tf auto model + add tests

* finish adding clm loss everywhere

* fix training in distilbert

* fix flake8

* save intermediate

* fix tf t5 naming

* remove prints

* finish up

* up

* fix tf gpt2

* fix new test utils import

* fix flake8

* keep backward compatibility

* Update src/transformers/modeling_tf_albert.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_auto.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_electra.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_roberta.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_mobilebert.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_auto.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_bert.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/modeling_tf_distilbert.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* apply sylvains suggestions
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
parent 33e43edd
......@@ -38,6 +38,9 @@ if is_tf_available():
TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,
TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
TF_MODEL_FOR_CAUSAL_LM_MAPPING,
TF_MODEL_FOR_MASKED_LM_MAPPING,
TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
)
if _tf_gpu_memory_limit is not None:
......@@ -93,6 +96,12 @@ class TFModelTesterMixin:
inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size)
elif model_class in TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values():
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
elif model_class in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values():
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
elif model_class in TF_MODEL_FOR_MASKED_LM_MAPPING.values():
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
elif model_class in TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values():
inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, self.model_tester.seq_length))
return inputs_dict
def test_initialization(self):
......@@ -291,7 +300,7 @@ class TFModelTesterMixin:
"decoder_input_ids": tf.keras.Input(
batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"
),
"inputs": tf.keras.Input(batch_shape=(2, 2000), name="inputs", dtype="int32"),
"input_ids": tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32"),
}
elif model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
input_ids = tf.keras.Input(batch_shape=(4, 2, 2000), name="input_ids", dtype="int32")
......@@ -325,7 +334,7 @@ class TFModelTesterMixin:
outputs_dict = model(self._prepare_for_class(inputs_dict, model_class))
inputs_keywords = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
input_ids = inputs_keywords.pop("input_ids" if not self.is_encoder_decoder else "inputs", None,)
input_ids = inputs_keywords.pop("input_ids", None)
outputs_keywords = model(input_ids, **inputs_keywords)
output_dict = outputs_dict[0].numpy()
output_keywords = outputs_keywords[0].numpy()
......@@ -479,9 +488,9 @@ class TFModelTesterMixin:
input_ids = inputs["input_ids"]
del inputs["input_ids"]
else:
encoder_input_ids = inputs["inputs"]
encoder_input_ids = inputs["input_ids"]
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
del inputs["inputs"]
del inputs["input_ids"]
inputs.pop("decoder_input_ids", None)
wte = model.get_input_embeddings()
......@@ -596,9 +605,15 @@ class TFModelTesterMixin:
added_label = prepared_for_class[list(prepared_for_class.keys() - inputs_dict.keys())[0]]
loss_size = tf.size(added_label)
if model.__class__ in TF_MODEL_FOR_CAUSAL_LM_MAPPING.values():
# if loss is causal lm loss, labels are shift, so that one label per batch
# is cut
loss_size = loss_size - self.model_tester.batch_size
# Test that model correctly compute the loss with kwargs
prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)
input_ids = prepared_for_class.pop("input_ids")
loss = model(input_ids, **prepared_for_class)[0]
self.assertEqual(loss.shape, [loss_size])
......
......@@ -17,7 +17,7 @@
import unittest
from transformers import DistilBertConfig, is_tf_available
from transformers.testing_utils import require_tf
from transformers.testing_utils import require_tf, slow
from .test_configuration_common import ConfigTester
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
......@@ -32,6 +32,7 @@ if is_tf_available():
TFDistilBertForSequenceClassification,
TFDistilBertForTokenClassification,
TFDistilBertForMultipleChoice,
TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST,
)
......@@ -118,9 +119,7 @@ class TFDistilBertModelTester:
model = TFDistilBertForMaskedLM(config=config)
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
(prediction_scores,) = model(inputs)
result = {
"prediction_scores": prediction_scores.numpy(),
}
result = {"prediction_scores": prediction_scores.numpy()}
self.parent.assertListEqual(
list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
)
......@@ -129,12 +128,12 @@ class TFDistilBertModelTester:
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertForQuestionAnswering(config=config)
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
start_logits, end_logits = model(inputs)
result = {
"start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(),
inputs = {
"input_ids": input_ids,
"attention_mask": input_mask,
}
start_logits, end_logits = model(inputs)
result = {"start_logits": start_logits.numpy(), "end_logits": end_logits.numpy()}
self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
......@@ -145,9 +144,7 @@ class TFDistilBertModelTester:
model = TFDistilBertForSequenceClassification(config)
inputs = {"input_ids": input_ids, "attention_mask": input_mask}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
result = {"logits": logits.numpy()}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
def create_and_check_distilbert_for_multiple_choice(
......@@ -162,9 +159,7 @@ class TFDistilBertModelTester:
"attention_mask": multiple_choice_input_mask,
}
(logits,) = model(inputs)
result = {
"logits": logits.numpy(),
}
result = {"logits": logits.numpy()}
self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
def create_and_check_distilbert_for_token_classification(
......@@ -236,8 +231,8 @@ class TFDistilBertModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_distilbert_for_token_classification(*config_and_inputs)
# @slow
# def test_model_from_pretrained(self):
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
# model = DistilBertModesss.from_pretrained(model_name)
# self.assertIsNotNone(model)
@slow
def test_model_from_pretrained(self):
for model_name in list(TF_DISTILBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]):
model = TFDistilBertModel.from_pretrained(model_name)
self.assertIsNotNone(model)
......@@ -77,6 +77,7 @@ class TFT5ModelTester:
eos_token_id=self.eos_token_id,
bos_token_id=self.pad_token_id,
pad_token_id=self.pad_token_id,
decoder_start_token_id=self.pad_token_id,
)
return (config, input_ids, input_mask, token_labels)
......@@ -84,7 +85,7 @@ class TFT5ModelTester:
def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
model = TFT5Model(config=config)
inputs = {
"inputs": input_ids,
"input_ids": input_ids,
"decoder_input_ids": input_ids,
"decoder_attention_mask": input_mask,
}
......@@ -115,7 +116,7 @@ class TFT5ModelTester:
def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
model = TFT5ForConditionalGeneration(config=config)
inputs_dict = {
"inputs": input_ids,
"input_ids": input_ids,
"decoder_input_ids": input_ids,
"decoder_attention_mask": input_mask,
}
......@@ -209,7 +210,7 @@ class TFT5ModelTester:
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, token_labels) = config_and_inputs
inputs_dict = {
"inputs": input_ids,
"input_ids": input_ids,
"decoder_input_ids": input_ids,
"decoder_attention_mask": input_mask,
"use_cache": tf.convert_to_tensor([False]),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment