Unverified Commit 2de2c9ec authored by NielsRogge's avatar NielsRogge Committed by GitHub
Browse files

Clean up vision tests (#17024)



* Clean up tests

* Make fixup
Co-authored-by: default avatarNiels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
parent 4be8b95a
...@@ -96,9 +96,9 @@ class BeitModelTester: ...@@ -96,9 +96,9 @@ class BeitModelTester:
self.out_indices = out_indices self.out_indices = out_indices
self.num_labels = num_labels self.num_labels = num_labels
# in BeiT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) # in BeiT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 1 self.seq_length = num_patches + 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -136,16 +136,14 @@ class BeitModelTester: ...@@ -136,16 +136,14 @@ class BeitModelTester:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
)
def create_and_check_for_masked_lm(self, config, pixel_values, labels, pixel_labels): def create_and_check_for_masked_lm(self, config, pixel_values, labels, pixel_labels):
model = BeitForMaskedImageModeling(config=config) model = BeitForMaskedImageModeling(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.expected_seq_length - 1, self.vocab_size)) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length - 1, self.vocab_size))
def create_and_check_for_image_classification(self, config, pixel_values, labels, pixel_labels): def create_and_check_for_image_classification(self, config, pixel_values, labels, pixel_labels):
config.num_labels = self.type_sequence_label_size config.num_labels = self.type_sequence_label_size
...@@ -155,7 +153,7 @@ class BeitModelTester: ...@@ -155,7 +153,7 @@ class BeitModelTester:
result = model(pixel_values, labels=labels) result = model(pixel_values, labels=labels)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size)) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.type_sequence_label_size))
def create_and_check_for_image_segmentation(self, config, pixel_values, labels, pixel_labels): def create_and_check_for_semantic_segmentation(self, config, pixel_values, labels, pixel_labels):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = BeitForSemanticSegmentation(config) model = BeitForSemanticSegmentation(config)
model.to(torch_device) model.to(torch_device)
...@@ -200,8 +198,8 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -200,8 +198,8 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(reason="BEiT does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
# BEiT does not use inputs_embeds
pass pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
...@@ -229,9 +227,17 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -229,9 +227,17 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_for_image_segmentation(self): def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
def test_for_semantic_segmentation(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs) self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs)
def test_training(self): def test_training(self):
if not self.model_tester.is_training: if not self.model_tester.is_training:
...@@ -267,13 +273,7 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -267,13 +273,7 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
or not model_class.supports_gradient_checkpointing or not model_class.supports_gradient_checkpointing
): ):
continue continue
# TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING
# this can then be incorporated into _prepare_for_class in test_modeling_common.py
elif model_class.__name__ == "BeitForSemanticSegmentation":
batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape
inputs_dict["labels"] = torch.zeros(
[self.model_tester.batch_size, height, width], device=torch_device
).long()
model = model_class(config) model = model_class(config)
model.gradient_checkpointing_enable() model.gradient_checkpointing_enable()
model.to(torch_device) model.to(torch_device)
...@@ -300,106 +300,6 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -300,106 +300,6 @@ class BeitModelTest(ModelTesterMixin, unittest.TestCase):
msg=f"Parameter {name} of model {model_class} seems not properly initialized", msg=f"Parameter {name} of model {model_class} seems not properly initialized",
) )
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
# BEiT has a different seq_length
seq_len = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(out_len + 1, len(outputs))
self_attentions = outputs.attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
# BEiT has a different seq_length
seq_length = self.model_tester.expected_seq_length
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
...@@ -75,9 +75,9 @@ class FlaxBeitModelTester(unittest.TestCase): ...@@ -75,9 +75,9 @@ class FlaxBeitModelTester(unittest.TestCase):
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range self.initializer_range = initializer_range
# in BeiT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) # in BeiT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 1 self.seq_length = num_patches + 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -108,14 +108,12 @@ class FlaxBeitModelTester(unittest.TestCase): ...@@ -108,14 +108,12 @@ class FlaxBeitModelTester(unittest.TestCase):
model = FlaxBeitModel(config=config) model = FlaxBeitModel(config=config)
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
)
def create_and_check_for_masked_lm(self, config, pixel_values, labels): def create_and_check_for_masked_lm(self, config, pixel_values, labels):
model = FlaxBeitForMaskedImageModeling(config=config) model = FlaxBeitForMaskedImageModeling(config=config)
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.expected_seq_length - 1, self.vocab_size)) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length - 1, self.vocab_size))
def create_and_check_for_image_classification(self, config, pixel_values, labels): def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size config.num_labels = self.type_sequence_label_size
...@@ -148,51 +146,7 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase): ...@@ -148,51 +146,7 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
# We need to override this test because in Beit, the seq_len equals the number of patches + 1 # We need to override this test because Beit's forward signature is different than text models.
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_length = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_length, seq_length],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
added_hidden_states = 1
self.assertEqual(out_len + added_hidden_states, len(outputs))
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_length, seq_length],
)
# We neeed to override this test because Beit's forward signature is different than text models.
def test_forward_signature(self): def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common() config, _ = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -229,34 +183,6 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase): ...@@ -229,34 +183,6 @@ class FlaxBeitModelTest(FlaxModelTesterMixin, unittest.TestCase):
for jitted_output, output in zip(jitted_outputs, outputs): for jitted_output, output in zip(jitted_outputs, outputs):
self.assertEqual(jitted_output.shape, output.shape) self.assertEqual(jitted_output.shape, output.shape)
# We need to override this test because in Beit, the seq_len equals the number of patches + 1
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
seq_length = self.model_tester.expected_seq_length
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def test_model(self): def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
......
...@@ -92,9 +92,9 @@ class DeiTModelTester: ...@@ -92,9 +92,9 @@ class DeiTModelTester:
self.scope = scope self.scope = scope
self.encoder_stride = encoder_stride self.encoder_stride = encoder_stride
# in DeiT, the expected seq_len equals the number of patches + 2 (we add 2 for the [CLS] and distilation tokens) # in DeiT, the seq length equals the number of patches + 2 (we add 2 for the [CLS] and distilation tokens)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 2 self.seq_length = num_patches + 2
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -129,9 +129,7 @@ class DeiTModelTester: ...@@ -129,9 +129,7 @@ class DeiTModelTester:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
)
def create_and_check_for_image_classification(self, config, pixel_values, labels): def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size config.num_labels = self.type_sequence_label_size
...@@ -181,8 +179,8 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -181,8 +179,8 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(reason="DeiT does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
# DeiT does not use inputs_embeds
pass pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
...@@ -210,94 +208,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -210,94 +208,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_attention_outputs(self): def test_for_image_classification(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config_and_inputs = self.model_tester.prepare_config_and_inputs()
config.return_dict = True self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
seq_len = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(out_len + 1, len(outputs))
self_attentions = outputs.attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
seq_length = self.model_tester.expected_seq_length
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
# special case for DeiTForImageClassificationWithTeacher model # special case for DeiTForImageClassificationWithTeacher model
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
...@@ -403,10 +316,6 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -403,10 +316,6 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
loss.backward() loss.backward()
def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
......
...@@ -81,9 +81,9 @@ class DPTModelTester: ...@@ -81,9 +81,9 @@ class DPTModelTester:
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.num_labels = num_labels self.num_labels = num_labels
self.scope = scope self.scope = scope
# expected sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token) # sequence length of DPT = num_patches + 1 (we add 1 for the [CLS] token)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 1 self.seq_length = num_patches + 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -118,9 +118,7 @@ class DPTModelTester: ...@@ -118,9 +118,7 @@ class DPTModelTester:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
)
def create_and_check_for_depth_estimation(self, config, pixel_values, labels): def create_and_check_for_depth_estimation(self, config, pixel_values, labels):
config.num_labels = self.num_labels config.num_labels = self.num_labels
...@@ -167,8 +165,8 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -167,8 +165,8 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(reason="DPT does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
# DPT does not use inputs_embeds
pass pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
...@@ -204,97 +202,6 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -204,97 +202,6 @@ class DPTModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs) self.model_tester.create_and_check_for_semantic_segmentation(*config_and_inputs)
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
# in DPT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
seq_len = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(len(outputs.attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(out_len + 1, len(outputs))
self_attentions = outputs.attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
# DPT has a different seq_length
seq_len = self.model_tester.expected_seq_length
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_len, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def test_training(self): def test_training(self):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
if model_class.__name__ == "DPTForDepthEstimation": if model_class.__name__ == "DPTForDepthEstimation":
......
...@@ -67,9 +67,9 @@ class FlaxViTModelTester(unittest.TestCase): ...@@ -67,9 +67,9 @@ class FlaxViTModelTester(unittest.TestCase):
self.type_sequence_label_size = type_sequence_label_size self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range self.initializer_range = initializer_range
# in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 1 self.seq_length = num_patches + 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -123,50 +123,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase): ...@@ -123,50 +123,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
# We need to override this test because in ViT, the seq_len equals the number of patches + 1
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_length = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_length, seq_length],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
added_hidden_states = 1
self.assertEqual(out_len + added_hidden_states, len(outputs))
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_length, seq_length],
)
# We neeed to override this test because ViT's forward signature is different than text models. # We neeed to override this test because ViT's forward signature is different than text models.
def test_forward_signature(self): def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common() config, _ = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -180,7 +136,7 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase): ...@@ -180,7 +136,7 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase):
expected_arg_names = ["pixel_values"] expected_arg_names = ["pixel_values"]
self.assertListEqual(arg_names[:1], expected_arg_names) self.assertListEqual(arg_names[:1], expected_arg_names)
# We neeed to override this test because ViT expects pixel_values instead of input_ids # We need to override this test because ViT expects pixel_values instead of input_ids
def test_jit_compilation(self): def test_jit_compilation(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -204,35 +160,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase): ...@@ -204,35 +160,6 @@ class FlaxViTModelTest(FlaxModelTesterMixin, unittest.TestCase):
for jitted_output, output in zip(jitted_outputs, outputs): for jitted_output, output in zip(jitted_outputs, outputs):
self.assertEqual(jitted_output.shape, output.shape) self.assertEqual(jitted_output.shape, output.shape)
# We need to override this test because in ViT, the seq_len equals the number of patches + 1
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
seq_length = self.model_tester.expected_seq_length
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
for model_class_name in self.all_model_classes: for model_class_name in self.all_model_classes:
......
...@@ -16,12 +16,10 @@ ...@@ -16,12 +16,10 @@
import inspect import inspect
import os
import tempfile
import unittest import unittest
from transformers import ViTConfig from transformers import ViTConfig
from transformers.testing_utils import require_tf, require_vision, slow, tooslow from transformers.testing_utils import require_tf, require_vision, slow
from transformers.utils import cached_property, is_tf_available, is_vision_available from transformers.utils import cached_property, is_tf_available, is_vision_available
from ..test_configuration_common import ConfigTester from ..test_configuration_common import ConfigTester
...@@ -80,9 +78,9 @@ class TFViTModelTester: ...@@ -80,9 +78,9 @@ class TFViTModelTester:
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.scope = scope self.scope = scope
# in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 1 self.seq_length = num_patches + 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -114,18 +112,14 @@ class TFViTModelTester: ...@@ -114,18 +112,14 @@ class TFViTModelTester:
def create_and_check_model(self, config, pixel_values, labels): def create_and_check_model(self, config, pixel_values, labels):
model = TFViTModel(config=config) model = TFViTModel(config=config)
result = model(pixel_values, training=False) result = model(pixel_values, training=False)
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
)
# Test with an image with different size than the one specified in config. # Test with an image with different size than the one specified in config.
image_size = self.image_size // 2 image_size = self.image_size // 2
pixel_values = pixel_values[:, :, :image_size, :image_size] pixel_values = pixel_values[:, :, :image_size, :image_size]
result = model(pixel_values, interpolate_pos_encoding=True, training=False) result = model(pixel_values, interpolate_pos_encoding=True, training=False)
expected_seq_length = (image_size // self.patch_size) ** 2 + 1 seq_length = (image_size // self.patch_size) ** 2 + 1
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, expected_seq_length, self.hidden_size)
)
def create_and_check_for_image_classification(self, config, pixel_values, labels): def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size config.num_labels = self.type_sequence_label_size
...@@ -166,12 +160,12 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -166,12 +160,12 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(reason="ViT does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
# ViT does not use inputs_embeds
pass pass
@unittest.skip(reason="ViT does not use inputs_embeds")
def test_graph_mode_with_inputs_embeds(self): def test_graph_mode_with_inputs_embeds(self):
# ViT does not use inputs_embeds
pass pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
...@@ -199,131 +193,6 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase): ...@@ -199,131 +193,6 @@ class TFViTModelTest(TFModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
# overwrite from common since `encoder_seq_length` and `encoder_key_length` are calculated
# in a different way than in text models.
@tooslow
def test_saved_model_creation_extended(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = True
config.output_attentions = True
if hasattr(config, "use_cache"):
config.use_cache = True
# in ViT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
seq_len = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
num_out = len(model(class_inputs_dict))
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
self.assertEqual(len(outputs), num_out)
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(output_hidden_states), expected_num_layers)
self.assertListEqual(
list(output_hidden_states[0].shape[-2:]),
[seq_len, self.model_tester.hidden_size],
)
self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(output_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
# in ViT, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
seq_len = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class), training=False)
self.assertEqual(out_len + 1, len(outputs))
self_attentions = outputs.attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
# ViT has a different seq_length
seq_length = self.model_tester.expected_seq_length
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def test_for_image_classification(self): def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs) self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
......
...@@ -81,9 +81,9 @@ class ViTModelTester: ...@@ -81,9 +81,9 @@ class ViTModelTester:
self.scope = scope self.scope = scope
self.encoder_stride = encoder_stride self.encoder_stride = encoder_stride
# in ViT, the expected seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
num_patches = (image_size // patch_size) ** 2 num_patches = (image_size // patch_size) ** 2
self.expected_seq_length = num_patches + 1 self.seq_length = num_patches + 1
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size]) pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
...@@ -118,9 +118,7 @@ class ViTModelTester: ...@@ -118,9 +118,7 @@ class ViTModelTester:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
result = model(pixel_values) result = model(pixel_values)
self.parent.assertEqual( self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
result.last_hidden_state.shape, (self.batch_size, self.expected_seq_length, self.hidden_size)
)
def create_and_check_for_image_classification(self, config, pixel_values, labels): def create_and_check_for_image_classification(self, config, pixel_values, labels):
config.num_labels = self.type_sequence_label_size config.num_labels = self.type_sequence_label_size
...@@ -169,8 +167,8 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -169,8 +167,8 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
@unittest.skip(reason="ViT does not use inputs_embeds")
def test_inputs_embeds(self): def test_inputs_embeds(self):
# ViT does not use inputs_embeds
pass pass
def test_model_common_attributes(self): def test_model_common_attributes(self):
...@@ -198,93 +196,6 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase): ...@@ -198,93 +196,6 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
seq_len = self.model_tester.expected_seq_length
for model_class in self.all_model_classes:
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = False
config.return_dict = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# check that output_attentions also work using config
del inputs_dict["output_attentions"]
config.output_attentions = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
attentions = outputs.attentions
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
out_len = len(outputs)
# Check attention is always last and order is fine
inputs_dict["output_attentions"] = True
inputs_dict["output_hidden_states"] = True
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
self.assertEqual(out_len + 1, len(outputs))
self_attentions = outputs.attentions
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, seq_len, seq_len],
)
def test_hidden_states_output(self):
def check_hidden_states_output(inputs_dict, config, model_class):
model = model_class(config)
model.to(torch_device)
model.eval()
with torch.no_grad():
outputs = model(**self._prepare_for_class(inputs_dict, model_class))
hidden_states = outputs.hidden_states
expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)
self.assertEqual(len(hidden_states), expected_num_layers)
self.assertListEqual(
list(hidden_states[0].shape[-2:]),
[self.model_tester.expected_seq_length, self.model_tester.hidden_size],
)
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs_dict["output_hidden_states"] = True
check_hidden_states_output(inputs_dict, config, model_class)
# check that output_hidden_states also work using config
del inputs_dict["output_hidden_states"]
config.output_hidden_states = True
check_hidden_states_output(inputs_dict, config, model_class)
def test_for_image_classification(self): def test_for_image_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_for_image_classification(*config_and_inputs) self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment