Clean up CLIP tests (#17380)

Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>

Clean up CLIP tests (#17380)
Co-authored-by: Niels Rogge <nielsrogge@Nielss-MacBook-Pro.local>
374a2f69 · NielsRogge · GitHub · d9809298 · 374a2f69
Unverified Commit 374a2f69 authored May 24, 2022 by NielsRogge Committed by GitHub May 24, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 109 deletions

tests/models/clip/test_modeling_clip.py tests/models/clip/test_modeling_clip.py +14 -109

No files found.
--- a/tests/models/clip/test_modeling_clip.py
+++ b/tests/models/clip/test_modeling_clip.py
@@ -100,6 +100,10 @@ class CLIPVisionModelTester:
        self.initializer_range = initializer_range
        self.scope = scope

+        # in ViT, the seq length equals the number of patches + 1 (we add 1 for the [CLS] token)
+        num_patches = (image_size // patch_size) ** 2
+        self.seq_length = num_patches + 1
+
    def prepare_config_and_inputs(self):
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
        config = self.get_config()
@@ -160,8 +164,8 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
    def test_config(self):
        self.config_tester.run_common_tests()

+    @unittest.skip(reason="CLIP does not use inputs_embeds")
    def test_inputs_embeds(self):
-        # CLIP does not use inputs_embeds
        pass

    def test_model_common_attributes(self):
@@ -189,114 +193,17 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

-    def test_attention_outputs(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        config.return_dict = True
-
-        # in CLIP, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
-        image_size = (self.model_tester.image_size, self.model_tester.image_size)
-        patch_size = (self.model_tester.patch_size, self.model_tester.patch_size)
-        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
-        seq_len = num_patches + 1
-
-        for model_class in self.all_model_classes:
-            inputs_dict["output_attentions"] = True
-            inputs_dict["output_hidden_states"] = False
-            config.return_dict = True
-            model = model_class(config)
-            model.to(torch_device)
-            model.eval()
-            with torch.no_grad():
-                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
-            attentions = outputs.attentions
-            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
-
-            # check that output_attentions also work using config
-            del inputs_dict["output_attentions"]
-            config.output_attentions = True
-            model = model_class(config)
-            model.to(torch_device)
-            model.eval()
-            with torch.no_grad():
-                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
-            attentions = outputs.attentions
-            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
-
-            out_len = len(outputs)
-
-            # Check attention is always last and order is fine
-            inputs_dict["output_attentions"] = True
-            inputs_dict["output_hidden_states"] = True
-            model = model_class(config)
-            model.to(torch_device)
-            model.eval()
-            with torch.no_grad():
-                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
-
-            added_hidden_states = 1
-            self.assertEqual(out_len + added_hidden_states, len(outputs))
-
-            self_attentions = outputs.attentions
-
-            self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
-
-            self.assertListEqual(
-                list(self_attentions[0].shape[-3:]),
-                [self.model_tester.num_attention_heads, seq_len, seq_len],
-            )
-
-    def test_hidden_states_output(self):
-        def check_hidden_states_output(inputs_dict, config, model_class):
-            model = model_class(config)
-            model.to(torch_device)
-            model.eval()
-
-            with torch.no_grad():
-                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
-
-            hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states
-
-            expected_num_layers = getattr(
-                self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
-            )
-            self.assertEqual(len(hidden_states), expected_num_layers)
-
-            # CLIP has a different seq_length
-            image_size = (self.model_tester.image_size, self.model_tester.image_size)
-            patch_size = (self.model_tester.patch_size, self.model_tester.patch_size)
-            num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
-            seq_length = num_patches + 1
-
-            self.assertListEqual(
-                list(hidden_states[0].shape[-2:]),
-                [seq_length, self.model_tester.hidden_size],
-            )
-
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-
-        for model_class in self.all_model_classes:
-            inputs_dict["output_hidden_states"] = True
-            check_hidden_states_output(inputs_dict, config, model_class)
-
-            # check that output_hidden_states also work using config
-            del inputs_dict["output_hidden_states"]
-            config.output_hidden_states = True
-
-            check_hidden_states_output(inputs_dict, config, model_class)
-
    def test_training(self):
        pass

    def test_training_gradient_checkpointing(self):
        pass

-    # skip this test as CLIPVisionModel has no base class and is
-    # not available in MODEL_MAPPING
+    @unittest.skip(reason="CLIPVisionModel has no base class and is not available in MODEL_MAPPING")
    def test_save_load_fast_init_from_base(self):
        pass

-    # skip this test as CLIPVisionModel has no base class and is
-    # not available in MODEL_MAPPING
+    @unittest.skip(reason="CLIPVisionModel has no base class and is not available in MODEL_MAPPING")
    def test_save_load_fast_init_to_base(self):
        pass

@@ -416,17 +323,15 @@ class CLIPTextModelTest(ModelTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing(self):
        pass

+    @unittest.skip(reason="CLIP does not use inputs_embeds")
    def test_inputs_embeds(self):
-        # CLIP does not use inputs_embeds
        pass

-    # skip this test as CLIPTextModel has no base class and is
-    # not available in MODEL_MAPPING
+    @unittest.skip(reason="CLIPTextModel has no base class and is not available in MODEL_MAPPING")
    def test_save_load_fast_init_from_base(self):
        pass

-    # skip this test as CLIPTextModel has no base class and is
-    # not available in MODEL_MAPPING
+    @unittest.skip(reason="CLIPTextModel has no base class and is not available in MODEL_MAPPING")
    def test_save_load_fast_init_to_base(self):
        pass

@@ -495,19 +400,19 @@ class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

-    # hidden_states are tested in individual model tests
+    @unittest.skip(reason="Hidden_states is tested in individual model tests")
    def test_hidden_states_output(self):
        pass

-    # input_embeds are tested in individual model tests
+    @unittest.skip(reason="Inputs_embeds is tested in individual model tests")
    def test_inputs_embeds(self):
        pass

-    # tested in individual model tests
+    @unittest.skip(reason="Retain_grad is tested in individual model tests")
    def test_retain_grad_hidden_states_attentions(self):
        pass

-    # CLIPModel does not have input/output embeddings
+    @unittest.skip(reason="CLIPModel does not have input/output embeddings")
    def test_model_common_attributes(self):
        pass