Skip tests properly (#31308)

* Skip tests properly * [test_all] * Add 'reason' as kwarg for skipTest * [test_all] Fix up * [test_all]

Skip tests properly (#31308)
* Skip tests properly * [test_all] * Add 'reason' as kwarg for skipTest * [test_all] Fix up * [test_all]
1de7dc74 · amyeroberts · GitHub · 1f9f57ab · 1de7dc74 · 1de7dc74
Unverified Commit 1de7dc74 authored Jun 26, 2024 by amyeroberts Committed by GitHub Jun 26, 2024
20 changed files
--- a/tests/models/groupvit/test_modeling_groupvit.py
+++ b/tests/models/groupvit/test_modeling_groupvit.py
@@ -262,9 +262,11 @@ class GroupViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
                    ],
                )
+    @unittest.skip
    def test_training(self):
        pass
+    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@@ -458,9 +460,11 @@ class GroupViTTextModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)
+    @unittest.skip
    def test_training(self):
        pass
+    @unittest.skip
    def test_training_gradient_checkpointing(self):
        pass
@@ -618,7 +622,7 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True

--- a/tests/models/herbert/test_tokenization_herbert.py
+++ b/tests/models/herbert/test_tokenization_herbert.py
@@ -95,7 +95,7 @@ class HerbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()

--- a/tests/models/hubert/test_modeling_hubert.py
+++ b/tests/models/hubert/test_modeling_hubert.py
@@ -350,22 +350,21 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    # `input_ids` is renamed to `input_values`
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    def test_forward_signature(self):
        pass
    # Hubert cannot resize token embeddings
    # since it has no tokens embeddings
+    @unittest.skip(reason="Hubert has no tokens embeddings")
    def test_resize_tokens_embeddings(self):
        pass
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
-    # and thus the `get_input_embeddings` fn
-    # is not implemented
    def test_model_get_set_embeddings(self):
        pass
@@ -438,10 +437,10 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    # Hubert cannot be TorchScripted because of torch.nn.utils.weight_norm
    def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
        # TODO: fix it
-        self.skipTest("torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
+        self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
        if not is_torch_fx_available() or not self.fx_compatible:
-            return
+            self.skipTest(reason="torch fx is not available or not compatible with this model")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.return_dict = False
@@ -615,22 +614,19 @@ class HubertRobustModelTest(ModelTesterMixin, unittest.TestCase):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    # `input_ids` is renamed to `input_values`
+    @unittest.skip(reason="Hubert has input_values instead of input_ids")
    def test_forward_signature(self):
        pass
-    # Hubert cannot resize token embeddings
+    @unittest.skip(reason="Hubert has no tokens embeddings")
-    # since it has no tokens embeddings
    def test_resize_tokens_embeddings(self):
        pass
-    # Hubert has no inputs_embeds
+    @unittest.skip(reason="Hubert has no inputs_embeds")
-    # and thus the `get_input_embeddings` fn
-    # is not implemented
    def test_model_get_set_embeddings(self):
        pass

--- a/tests/models/ibert/test_modeling_ibert.py
+++ b/tests/models/ibert/test_modeling_ibert.py
@@ -379,7 +379,7 @@ class IBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
            with torch.no_grad():
                model(**inputs)[0]
-    @unittest.skip("ibert overrides scaling to None if inputs_embeds")
+    @unittest.skip(reason="ibert overrides scaling to None if inputs_embeds")
    def test_inputs_embeds_matches_input_ids(self):
        pass

--- a/tests/models/idefics/test_image_processing_idefics.py
+++ b/tests/models/idefics/test_image_processing_idefics.py
@@ -191,18 +191,18 @@ class IdeficsImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
        torch.testing.assert_close(pixel_values_transform_implied, pixel_values_transform_supplied, rtol=0.0, atol=0.0)
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_numpy(self):
        pass
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_numpy_4_channels(self):
        pass
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_pil(self):
        pass
-    @unittest.skip("not supported")
+    @unittest.skip(reason="not supported")
    def test_call_pytorch(self):
        pass
--- a/tests/models/idefics/test_modeling_idefics.py
+++ b/tests/models/idefics/test_modeling_idefics.py
@@ -316,7 +316,7 @@ class IdeficsModelTester:
    @slow
    @parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
    def test_eager_matches_sdpa_inference(self, torch_dtype: str):
-        self.skipTest("Idefics has a hard requirement on SDPA, skipping this test")
+        self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
 @unittest.skipIf(not is_torch_greater_or_equal_than_2_0, reason="pytorch 2.0 or higher is required")
@@ -422,13 +422,13 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        for model_class in self.all_model_classes:
            # IdeficsModel does not support training, users should use
            # IdeficsForVisionText2Text for this purpose
            if model_class == IdeficsModel:
-                return
+                self.skipTest(reason="IdeficsModel does not support training")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            config.return_dict = True
@@ -442,13 +442,13 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def test_training_gradient_checkpointing(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        for model_class in self.all_model_classes:
            # IdeficsModel does not support training, users should use
            # IdeficsForVisionText2Text for this purpose
            if model_class == IdeficsModel:
-                return
+                self.skipTest(reason="IdeficsModel does not support training")
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
            config.use_cache = False
@@ -575,7 +575,7 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    @slow
    @parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
    def test_eager_matches_sdpa_inference(self, torch_dtype: str):
-        self.skipTest("Idefics has a hard requirement on SDPA, skipping this test")
+        self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
 @unittest.skipIf(not is_torch_greater_or_equal_than_2_0, reason="pytorch 2.0 or higher is required")
@@ -590,11 +590,11 @@ class IdeficsForVisionText2TextTest(IdeficsModelTest, unittest.TestCase):
        )
        self.config_tester = ConfigTester(self, config_class=IdeficsConfig, hidden_size=37)
-    @unittest.skip("We only test the model that takes in multiple images")
+    @unittest.skip(reason="We only test the model that takes in multiple images")
    def test_model(self):
        pass
-    @unittest.skip("We only test the model that takes in multiple images")
+    @unittest.skip(reason="We only test the model that takes in multiple images")
    def test_for_token_classification(self):
        pass

--- a/tests/models/idefics2/test_modeling_idefics2.py
+++ b/tests/models/idefics2/test_modeling_idefics2.py
@@ -176,19 +176,19 @@ class Idefics2ModelTest(ModelTesterMixin, unittest.TestCase):
        self.model_tester = Idefics2VisionText2TextModelTester(self)
        self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
-    @unittest.skip("input_embeds cannot be passed in without input_ids")
+    @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
    def test_inputs_embeds():
        pass
-    @unittest.skip("input_embeds cannot be passed in without input_ids")
+    @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
    def test_inputs_embeds_matches_input_ids(self):
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_generate_padding_right(self):
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_inference_padding_right(self):
        pass
@@ -336,15 +336,15 @@ class Idefics2ForConditionalGenerationModelTest(GenerationTesterMixin, ModelTest
        self.model_tester = Idefics2VisionText2TextModelTester(self)
        self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
-    @unittest.skip("input_embeds cannot be passed in without input_ids")
+    @unittest.skip(reason="input_embeds cannot be passed in without input_ids")
    def test_inputs_embeds():
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_generate_padding_right(self):
        pass
-    @unittest.skip("Model does not support padding right")
+    @unittest.skip(reason="Model does not support padding right")
    def test_flash_attn_2_inference_padding_right(self):
        pass

--- a/tests/models/imagegpt/test_image_processing_imagegpt.py
+++ b/tests/models/imagegpt/test_image_processing_imagegpt.py
@@ -176,7 +176,7 @@ class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
                else:
                    self.assertEqual(image_processor_first[key], value)
-    @unittest.skip("ImageGPT requires clusters at initialization")
+    @unittest.skip(reason="ImageGPT requires clusters at initialization")
    def test_init_without_params(self):
        pass
@@ -220,7 +220,7 @@ class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
            tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
        )
-    @unittest.skip("ImageGPT assumes clusters for 3 channels")
+    @unittest.skip(reason="ImageGPT assumes clusters for 3 channels")
    def test_call_numpy_4_channels(self):
        pass

--- a/tests/models/imagegpt/test_modeling_imagegpt.py
+++ b/tests/models/imagegpt/test_modeling_imagegpt.py
@@ -357,7 +357,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            inputs_dict,
        ) = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.test_resize_embeddings:
-            return
+            self.skipTest(reason="test_resize_embeddings is set to False")
        for model_class in self.all_model_classes:
            config = copy.deepcopy(original_config)
@@ -404,13 +404,13 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            inputs_dict,
        ) = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.test_resize_embeddings:
-            return
+            self.skipTest(reason="test_resize_embeddings is set to False")
        original_config.tie_word_embeddings = False
        # if model cannot untied embeddings -> leave test
        if original_config.tie_word_embeddings:
-            return
+            self.skipTest(reason="tie_word_embeddings is set to False")
        for model_class in self.all_model_classes:
            config = copy.deepcopy(original_config)
@@ -493,7 +493,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
@@ -573,7 +573,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
            self.assertTrue(models_equal)
-    @unittest.skip("The model doesn't support left padding")  # and it's not used enough to be worth fixing :)
+    @unittest.skip(reason="The model doesn't support left padding")  # and it's not used enough to be worth fixing :)
    def test_left_padding_compatibility(self):
        pass

--- a/tests/models/informer/test_modeling_informer.py
+++ b/tests/models/informer/test_modeling_informer.py
@@ -278,17 +278,19 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
            check_hidden_states_output(inputs_dict, config, model_class)
-    # Ignore since we have no tokens embeddings
+    @unittest.skip(reason="Informer does not have tokens embeddings")
    def test_resize_tokens_embeddings(self):
        pass
+    @unittest.skip
    def test_model_outputs_equivalence(self):
        pass
+    @unittest.skip
    def test_determinism(self):
        pass
-    @unittest.skip("randomly selects U keys while calculating attentions")
+    @unittest.skip(reason="randomly selects U keys while calculating attentions")
    def test_batching_equivalence(self):
        pass

--- a/tests/models/jamba/test_modeling_jamba.py
+++ b/tests/models/jamba/test_modeling_jamba.py
@@ -390,7 +390,7 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        Overriding the test_mismatched_shapes_have_properly_initialized_weights test because A_log and D params of the
        Mamba block are initialized differently and we tested that in test_initialization
        """
-        self.skipTest("Cumbersome and redundant for Jamba")
+        self.skipTest(reason="Cumbersome and redundant for Jamba")
    def test_attention_outputs(self):
        r"""
@@ -638,9 +638,9 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
        Overriding the test_flash_attn_2_inference_padding_right test as the Jamba model, like Mixtral, doesn't support
        right padding + use cache with FA2
        """
-        self.skipTest("Jamba flash attention does not support right padding")
+        self.skipTest(reason="Jamba flash attention does not support right padding")
-    @unittest.skip("Jamba has its own special cache type")
+    @unittest.skip(reason="Jamba has its own special cache type")
    @parameterized.expand([(1, False), (1, True), (4, False)])
    def test_new_cache_format(self, num_beams, do_sample):
        pass

--- a/tests/models/jetmoe/test_modeling_jetmoe.py
+++ b/tests/models/jetmoe/test_modeling_jetmoe.py
@@ -378,11 +378,11 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
        result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
        self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
-    @unittest.skip("JetMoe buffers include complex numbers, which breaks this test")
+    @unittest.skip(reason="JetMoe buffers include complex numbers, which breaks this test")
    def test_save_load_fast_init_from_base(self):
        pass
-    @unittest.skip("JetMoe uses MoA on all models so the KV cache is a non standard format")
+    @unittest.skip(reason="JetMoe uses MoA on all models so the KV cache is a non standard format")
    def test_past_key_values_format(self):
        pass
@@ -470,7 +470,7 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
    @pytest.mark.flash_attn_test
    @slow
    def test_flash_attn_2_inference_equivalence_right_padding(self):
-        self.skipTest("JetMoe flash attention does not support right padding")
+        self.skipTest(reason="JetMoe flash attention does not support right padding")
 @require_torch

--- a/tests/models/kosmos2/test_modeling_kosmos2.py
+++ b/tests/models/kosmos2/test_modeling_kosmos2.py
@@ -375,7 +375,7 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    # overwrite from common in order to use `config.text_config.vocab_size` instead of `config.vocab_size`
    def test_tie_model_weights(self):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -429,7 +429,7 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
-            return
+            self.skipTest(reason="test_torchscript is set to False")
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        configs_no_init.torchscript = True

--- a/tests/models/layoutlm/test_tokenization_layoutlm.py
+++ b/tests/models/layoutlm/test_tokenization_layoutlm.py
@@ -69,6 +69,7 @@ class LayoutLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
        self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
+    @unittest.skip
    def test_special_tokens_as_you_expect(self):
        """If you are training a seq2seq model that expects a decoder_prefix token make sure it is prepended to decoder_input_ids"""
        pass
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -96,7 +96,7 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
        image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
        self.assertEqual(image_processor.size, {"height": 42, "width": 42})
-    @unittest.skip("Tesseract version is not correct in ci. @Arthur FIXME")
+    @unittest.skip(reason="Tesseract version is not correct in ci. @Arthur FIXME")
    def test_layoutlmv2_integration_test(self):
        # with apply_OCR = True
        image_processing = LayoutLMv2ImageProcessor()

--- a/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_modeling_layoutlmv2.py
@@ -414,7 +414,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
            check_hidden_states_output(inputs_dict, config, model_class)
-    @unittest.skip("We cannot configure detectron2 to output a smaller backbone")
+    @unittest.skip(reason="We cannot configure detectron2 to output a smaller backbone")
    def test_model_is_small(self):
        pass

--- a/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_tokenization_layoutlmv2.py
@@ -195,7 +195,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            tokenizer.tokenize(" \tHeLLo!how  \n Are yoU? [UNK]"), ["HeLLo", "!", "how", "Are", "yoU", "?", "[UNK]"]
        )
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@@ -385,11 +385,11 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
                self.assertIn(decoded, [output, output.lower()])
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_split_special_tokens(self):
        pass
@@ -814,7 +814,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        words, boxes = self.get_words_and_boxes_batch()
@@ -835,7 +835,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
@@ -942,7 +942,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@@ -1003,7 +1003,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    words, boxes = self.get_words_and_boxes()
@@ -1046,7 +1046,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1290,13 +1290,13 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@@ -1327,11 +1327,11 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@@ -1349,7 +1349,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1403,7 +1403,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1593,7 +1593,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@@ -1630,7 +1630,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@@ -1743,7 +1743,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1837,7 +1837,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        self.assertEqual(len(tokens[key].shape), 3)
                        self.assertEqual(tokens[key].shape[-1], 4)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
@@ -1875,7 +1875,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        return words, boxes, output_ids
-    # @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
+    # @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_pair_input(self):
        tokenizers = self.get_tokenizers(do_lower_case=False, model_max_length=100)
        for tokenizer in tokenizers:
@@ -2237,7 +2237,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    self.assertEqual(bbox, bbox_second_sequence)
                    self.assertEqual(overflowing_bbox, overflowing_token_bbox_second_sequence_slow)
-    # @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
+    # @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_single_input(self):
        tokenizers = self.get_tokenizers(do_lower_case=False, model_max_length=100)
        for tokenizer in tokenizers:
@@ -2359,15 +2359,15 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    self.assertEqual(bbox, sequence["bbox"][:-2])
                    self.assertEqual(overflowing_bbox, sequence["bbox"][-(2 + stride) :])
-    @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv2 tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="LayoutLMv2 tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv2 fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="LayoutLMv2 fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@@ -2476,10 +2476,10 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
--- a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
+++ b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py
@@ -140,7 +140,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        output_text = "lower newer"
        return input_text, output_text
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@@ -265,11 +265,11 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
                self.assertIn(decoded, [output, output.lower()])
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_split_special_tokens(self):
        pass
@@ -694,7 +694,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        words, boxes = self.get_words_and_boxes_batch()
@@ -715,7 +715,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
@@ -822,7 +822,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@@ -883,7 +883,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    words, boxes = self.get_words_and_boxes()
@@ -926,7 +926,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1168,13 +1168,13 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@@ -1205,11 +1205,11 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@@ -1227,7 +1227,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1281,7 +1281,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1471,7 +1471,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@@ -1508,7 +1508,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@@ -1621,7 +1621,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1720,7 +1720,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        self.assertEqual(len(tokens[key].shape), 3)
                        self.assertEqual(tokens[key].shape[-1], 4)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
@@ -2272,15 +2272,15 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    # self.assertEqual(bbox, sequence["bbox"][:-2])
                    # self.assertEqual(overflowing_bbox, sequence["bbox"][-(2 + stride) :])
-    @unittest.skip("LayoutLMv3 tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="LayoutLMv3 tokenizer requires boxes besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv3 tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="LayoutLMv3 tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("LayoutLMv3 fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="LayoutLMv3 fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@@ -2393,7 +2393,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
@@ -2408,13 +2408,13 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@@ -2433,6 +2433,6 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                model(encoded_sequence)
                model(batch_encoded_sequence)
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
--- a/tests/models/layoutxlm/test_tokenization_layoutxlm.py
+++ b/tests/models/layoutxlm/test_tokenization_layoutxlm.py
@@ -107,7 +107,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        output_text = "unwanted, running"
        return input_text, output_text
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@@ -115,7 +115,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    # this tokenizer
    def test_save_sentencepiece_tokenizer(self) -> None:
        if not self.test_sentencepiece or not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_sentencepiece or test_slow_tokenizer is set to False")
        # We want to verify that we will be able to save the tokenizer even if the original files that were used to
        # build the tokenizer have been deleted in the meantime.
        words, boxes = self.get_words_and_boxes()
@@ -745,7 +745,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        words, boxes = self.get_words_and_boxes_batch()
@@ -766,7 +766,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
@@ -873,7 +873,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@@ -934,7 +934,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    words, boxes = self.get_words_and_boxes()
@@ -977,7 +977,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1066,7 +1066,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                shutil.rmtree(tmpdirname)
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
@@ -1224,13 +1224,13 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
                model = model_class(config)
@@ -1256,11 +1256,11 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@@ -1278,7 +1278,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1332,7 +1332,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1522,7 +1522,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@@ -1559,7 +1559,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@@ -1672,7 +1672,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1770,7 +1770,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_save_pretrained(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        self.tokenizers_list[0] = (self.rust_tokenizer_class, "hf-internal-testing/tiny-random-layoutxlm", {})
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
@@ -1838,27 +1838,27 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                shutil.rmtree(tmpdirname2)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
-    @unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_pair_input(self):
        pass
-    @unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
    def test_maximum_encoding_length_single_input(self):
        pass
-    @unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
+    @unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("layoutxlm tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="layoutxlm tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("layoutxlm fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="layoutxlm fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@@ -1962,18 +1962,18 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
-    @unittest.skip("Doesn't use SentencePiece")
+    @unittest.skip(reason="Doesn't use SentencePiece")
    def test_sentencepiece_tokenize_and_convert_tokens_to_string(self):
        pass
-    @unittest.skip("Doesn't use SentencePiece")
+    @unittest.skip(reason="Doesn't use SentencePiece")
    def test_sentencepiece_tokenize_and_decode(self):
        pass
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
--- a/tests/models/led/test_modeling_led.py
+++ b/tests/models/led/test_modeling_led.py
@@ -378,8 +378,8 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
        model.generate(input_ids, attention_mask=attention_mask)
        model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
+    @unittest.skip(reason="Longformer cannot keep gradients in attentions or hidden states")
    def test_retain_grad_hidden_states_attentions(self):
-        # longformer cannot keep gradients in attentions or hidden states
        return
    def test_attention_outputs(self):