"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "2d184cb553ee20943b03b253f44300e466357871"
Unverified Commit dfc76b25 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

has_attentions - consistent test skipping logic and tf tests (#17495)

parent 66e86567
...@@ -158,6 +158,10 @@ class ConvNextModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -158,6 +158,10 @@ class ConvNextModelTest(ModelTesterMixin, unittest.TestCase):
def create_and_test_config_common_properties(self): def create_and_test_config_common_properties(self):
return return
@unittest.skip(reason="ConvNext does not output attentions")
def test_attention_outputs(self):
pass
@unittest.skip(reason="ConvNext does not use inputs_embeds") @unittest.skip(reason="ConvNext does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass
......
...@@ -173,6 +173,10 @@ class CvtModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -173,6 +173,10 @@ class CvtModelTest(ModelTesterMixin, unittest.TestCase):
def create_and_test_config_common_properties(self): def create_and_test_config_common_properties(self):
return return
@unittest.skip(reason="Cvt does not output attentions")
def test_attention_outputs(self):
pass
@unittest.skip(reason="Cvt does not use inputs_embeds") @unittest.skip(reason="Cvt does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass
......
...@@ -695,6 +695,10 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase): ...@@ -695,6 +695,10 @@ class FlavaImageCodebookTest(ModelTesterMixin, unittest.TestCase):
expected_arg_names = ["pixel_values"] expected_arg_names = ["pixel_values"]
self.assertListEqual(arg_names[:1], expected_arg_names) self.assertListEqual(arg_names[:1], expected_arg_names)
@unittest.skip(reason="Flava does not output attentions")
def test_attention_outputs(self):
pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
# No embedding in multimodal model # No embedding in multimodal model
pass pass
......
...@@ -142,6 +142,10 @@ class PoolFormerModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -142,6 +142,10 @@ class PoolFormerModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip(reason="PoolFormer does not output attentions")
def test_attention_outputs(self):
pass
@unittest.skip("PoolFormer does not use inputs_embeds") @unittest.skip("PoolFormer does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass
......
...@@ -147,6 +147,10 @@ class RegNetModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -147,6 +147,10 @@ class RegNetModelTest(ModelTesterMixin, unittest.TestCase):
def create_and_test_config_common_properties(self): def create_and_test_config_common_properties(self):
return return
@unittest.skip(reason="RegNet does not output attentions")
def test_attention_outputs(self):
pass
@unittest.skip(reason="RegNet does not use inputs_embeds") @unittest.skip(reason="RegNet does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass
......
...@@ -147,6 +147,10 @@ class ResNetModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -147,6 +147,10 @@ class ResNetModelTest(ModelTesterMixin, unittest.TestCase):
def create_and_test_config_common_properties(self): def create_and_test_config_common_properties(self):
return return
@unittest.skip(reason="ResNet does not output attentions")
def test_attention_outputs(self):
pass
@unittest.skip(reason="ResNet does not use inputs_embeds") @unittest.skip(reason="ResNet does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass
......
...@@ -144,6 +144,10 @@ class VanModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -144,6 +144,10 @@ class VanModelTest(ModelTesterMixin, unittest.TestCase):
def create_and_test_config_common_properties(self): def create_and_test_config_common_properties(self):
return return
@unittest.skip(reason="Van does not output attentions")
def test_attention_outputs(self):
pass
@unittest.skip(reason="Van does not use inputs_embeds") @unittest.skip(reason="Van does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
pass pass
......
...@@ -485,123 +485,119 @@ class ModelTesterMixin: ...@@ -485,123 +485,119 @@ class ModelTesterMixin:
loss.backward() loss.backward()
def test_attention_outputs(self): def test_attention_outputs(self):
if not self.has_attentions: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
pass config.return_dict = True
else: seq_len = getattr(self.model_tester, "seq_length", None)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len)
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len)
decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)
chunk_length = getattr(self.model_tester, "chunk_length", None)
if chunk_length is not None and hasattr(self.model_tester, "num_hashes"):
encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
seq_len = getattr(self.model_tester, "seq_length", None) # check that output_attentions also work using config
decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len) del inputs_dict["output_attentions"]
encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len) config.output_attentions = True
decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length) model = model_class(config)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) model.to(torch_device)
chunk_length = getattr(self.model_tester, "chunk_length", None) model.eval()
if chunk_length is not None and hasattr(self.model_tester, "num_hashes"): with torch.no_grad():
encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
for model_class in self.all_model_classes: self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
if chunk_length is not None:
self.assertListEqual(
list(attentions[0].shape[-4:]),
[self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length],
)
else:
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)
out_len = len(outputs)
if self.is_encoder_decoder:
correct_outlen = 5
# loss is at first position
if "labels" in inputs_dict:
correct_outlen += 1 # loss is added to beginning
# Question Answering model returns start_logits and end_logits
if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
if "past_key_values" in outputs:
correct_outlen += 1 # past_key_values have been returned
self.assertEqual(out_len, correct_outlen)
# decoder attentions
decoder_attentions = outputs.decoder_attentions
self.assertIsInstance(decoder_attentions, (list, tuple))
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
)
# cross attentions if chunk_length is not None:
cross_attentions = outputs.cross_attentions self.assertListEqual(
self.assertIsInstance(cross_attentions, (list, tuple)) list(attentions[0].shape[-4:]),
self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers) [self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length],
self.assertListEqual( )
list(cross_attentions[0].shape[-3:]), else:
[ self.assertListEqual(
self.model_tester.num_attention_heads, list(attentions[0].shape[-3:]),
decoder_seq_length, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_key_length, )
], out_len = len(outputs)
)
if self.is_encoder_decoder:
correct_outlen = 5
# loss is at first position
if "labels" in inputs_dict:
correct_outlen += 1 # loss is added to beginning
# Question Answering model returns start_logits and end_logits
if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
correct_outlen += 1 # start_logits and end_logits instead of only 1 output
if "past_key_values" in outputs:
correct_outlen += 1 # past_key_values have been returned
self.assertEqual(out_len, correct_outlen)
# decoder attentions
decoder_attentions = outputs.decoder_attentions
self.assertIsInstance(decoder_attentions, (list, tuple))
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
)
# Check attention is always last and order is fine # cross attentions
inputs_dict["output_attentions"] = True cross_attentions = outputs.cross_attentions
inputs_dict["output_hidden_states"] = True self.assertIsInstance(cross_attentions, (list, tuple))
model = model_class(config) self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers)
model.to(torch_device) self.assertListEqual(
model.eval() list(cross_attentions[0].shape[-3:]),
with torch.no_grad(): [
outputs = model(**self._prepare_for_class(inputs_dict, model_class)) self.model_tester.num_attention_heads,
decoder_seq_length,
encoder_key_length,
],
)
if hasattr(self.model_tester, "num_hidden_states_types"): # Check attention is always last and order is fine
added_hidden_states = self.model_tester.num_hidden_states_types inputs_dict["output_attentions"] = True
elif self.is_encoder_decoder: inputs_dict["output_hidden_states"] = True
added_hidden_states = 2 model = model_class(config)
else: model.to(torch_device)
added_hidden_states = 1 model.eval()
self.assertEqual(out_len + added_hidden_states, len(outputs)) with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions if hasattr(self.model_tester, "num_hidden_states_types"):
added_hidden_states = self.model_tester.num_hidden_states_types
elif self.is_encoder_decoder:
added_hidden_states = 2
else:
added_hidden_states = 1
self.assertEqual(out_len + added_hidden_states, len(outputs))
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
if chunk_length is not None:
self.assertListEqual( self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
list(self_attentions[0].shape[-4:]), if chunk_length is not None:
[self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length], self.assertListEqual(
) list(self_attentions[0].shape[-4:]),
else: [self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length],
self.assertListEqual( )
list(self_attentions[0].shape[-3:]), else:
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], self.assertListEqual(
) list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)
@slow @slow
def test_torchscript_simple(self): def test_torchscript_simple(self):
......
...@@ -978,9 +978,10 @@ class TFModelTesterMixin: ...@@ -978,9 +978,10 @@ class TFModelTesterMixin:
dict_inputs = self._prepare_for_class(inputs_dict, model_class) dict_inputs = self._prepare_for_class(inputs_dict, model_class)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})
tuple_inputs = self._prepare_for_class(inputs_dict, model_class) if self.has_attentions:
dict_inputs = self._prepare_for_class(inputs_dict, model_class) tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) dict_inputs = self._prepare_for_class(inputs_dict, model_class)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})
# Not all models accept "labels" in the forward pass (yet :) ) # Not all models accept "labels" in the forward pass (yet :) )
if "labels" in inspect.signature(model.call).parameters.keys(): if "labels" in inspect.signature(model.call).parameters.keys():
...@@ -992,15 +993,16 @@ class TFModelTesterMixin: ...@@ -992,15 +993,16 @@ class TFModelTesterMixin:
dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})
tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) if self.has_attentions:
dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})
tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
check_equivalence( check_equivalence(
model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True} model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True}
) )
def test_inputs_embeds(self): def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment