Skip tests properly (#31308)

* Skip tests properly * [test_all] * Add 'reason' as kwarg for skipTest * [test_all] Fix up * [test_all]

Skip tests properly (#31308)
* Skip tests properly * [test_all] * Add 'reason' as kwarg for skipTest * [test_all] Fix up * [test_all]
1de7dc74 · amyeroberts · GitHub · 1f9f57ab · 1de7dc74 · 1de7dc74
Unverified Commit 1de7dc74 authored Jun 26, 2024 by amyeroberts Committed by GitHub Jun 26, 2024
20 changed files
--- a/tests/models/led/test_tokenization_led.py
+++ b/tests/models/led/test_tokenization_led.py
@@ -154,6 +154,7 @@ class TestTokenizationLED(TokenizerTesterMixin, unittest.TestCase):
            outputs = tokenizer.pad(encoded_output)
            self.assertSequenceEqual(outputs["global_attention_mask"], expected_global_attention_mask)
+    @unittest.skip
    def test_pretokenized_inputs(self):
        pass

--- a/tests/models/levit/test_modeling_levit.py
+++ b/tests/models/levit/test_modeling_levit.py
@@ -281,7 +281,7 @@ class LevitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    # special case for LevitForImageClassificationWithTeacher model
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True
@@ -303,7 +303,7 @@ class LevitModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        config.use_cache = False
        config.return_dict = True

--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -393,7 +393,7 @@ class LlamaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
            (self.model_tester.batch_size, self.model_tester.seq_length, self.model_tester.num_labels),
        )
-    @unittest.skip("Llama buffers include complex numbers, which breaks this test")
+    @unittest.skip(reason="Llama buffers include complex numbers, which breaks this test")
    def test_save_load_fast_init_from_base(self):
        pass
@@ -710,7 +710,7 @@ class LlamaIntegrationTest(unittest.TestCase):
        # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
        # work as intended. See https://github.com/pytorch/pytorch/issues/121943
        if version.parse(torch.__version__) < version.parse("2.3.0"):
-            self.skipTest("This test requires torch >= 2.3 to run.")
+            self.skipTest(reason="This test requires torch >= 2.3 to run.")
        NUM_TOKENS_TO_GENERATE = 40
        # Note on `EXPECTED_TEXT_COMPLETION`'s diff: the current value matches the original test if the original test

--- a/tests/models/llama/test_tokenization_llama.py
+++ b/tests/models/llama/test_tokenization_llama.py
@@ -26,7 +26,6 @@ from transformers import (
    AddedToken,
    LlamaTokenizer,
    LlamaTokenizerFast,
-    is_torch_available,
 )
 from transformers.convert_slow_tokenizer import convert_slow_tokenizer
 from transformers.testing_utils import (
@@ -45,10 +44,6 @@ from ...test_tokenization_common import TokenizerTesterMixin
 SAMPLE_VOCAB = get_tests_dir("fixtures/test_sentencepiece.model")
-if is_torch_available():
-    pass
 @require_sentencepiece
 @require_tokenizers
 class LlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
@@ -144,7 +139,7 @@ class LlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            ],
        )
-    @unittest.skip("Let's wait for the fast tokenizer!")
+    @unittest.skip(reason="Let's wait for the fast tokenizer!")
    def test_save_pretrained(self):
        self.tokenizers_list += (self.rust_tokenizer_class, "hf-internal-testing/llama-tokenizer", {})
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
@@ -213,7 +208,7 @@ class LlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    @require_torch
    def test_batch_tokenization(self):
        if not self.test_seq2seq:
-            return
+            self.skipTest(reason="test_seq2seq is set to False")
        tokenizers = self.get_tokenizers()
        for tokenizer in tokenizers:
@@ -233,7 +228,7 @@ class LlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        return_tensors="pt",
                    )
                except NotImplementedError:
-                    return
+                    self.skipTest(reason="Encountered NotImplementedError when calling tokenizer")
                self.assertEqual(batch.input_ids.shape[1], 3)
                # max_target_length will default to max_length if not specified
                batch = tokenizer(text, max_length=3, return_tensors="pt")
@@ -244,7 +239,7 @@ class LlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                self.assertEqual(batch_encoder_only.attention_mask.shape[1], 3)
                self.assertNotIn("decoder_input_ids", batch_encoder_only)
-    @unittest.skip("Unfortunately way too slow to build a BPE with SentencePiece.")
+    @unittest.skip(reason="Unfortunately way too slow to build a BPE with SentencePiece.")
    def test_save_slow_from_fast_and_reload_fast(self):
        pass
@@ -299,11 +294,11 @@ class LlamaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            pickled_tokenizer = pickle.dumps(tokenizer)
        pickle.loads(pickled_tokenizer)
-    @unittest.skip("worker 'gw4' crashed on CI, passing locally.")
+    @unittest.skip(reason="worker 'gw4' crashed on CI, passing locally.")
    def test_pickle_subword_regularization_tokenizer(self):
        pass
-    @unittest.skip("worker 'gw4' crashed on CI, passing locally.")
+    @unittest.skip(reason="worker 'gw4' crashed on CI, passing locally.")
    def test_subword_regularization_tokenizer(self):
        pass

--- a/tests/models/llava_next/test_image_processing_llava_next.py
+++ b/tests/models/llava_next/test_image_processing_llava_next.py
@@ -197,7 +197,9 @@ class LlavaNextImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
        expected_output_image_shape = (7, 1445, 3, 18, 18)
        self.assertEqual(tuple(encoded_images.shape), expected_output_image_shape)
-    @unittest.skip("LlavaNextImageProcessor doesn't treat 4 channel PIL and numpy consistently yet")  # FIXME Amy
+    @unittest.skip(
+        reason="LlavaNextImageProcessor doesn't treat 4 channel PIL and numpy consistently yet"
+    )  # FIXME Amy
    def test_call_numpy_4_channels(self):
        pass

--- a/tests/models/longformer/test_modeling_longformer.py
+++ b/tests/models/longformer/test_modeling_longformer.py
@@ -384,11 +384,11 @@ class LongformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
+    @unittest.skip(reason="Longformer cannot keep gradients in attention or hidden states")
    def test_retain_grad_hidden_states_attentions(self):
-        # longformer cannot keep gradients in attentions or hidden states
        return
-    @unittest.skip("LongFormer calculates global attn only when attn_mask has non-zero elements")
+    @unittest.skip(reason="LongFormer calculates global attn only when attn_mask has non-zero elements")
    def test_batching_equivalence(self):
        return

--- a/tests/models/longformer/test_tokenization_longformer.py
+++ b/tests/models/longformer/test_tokenization_longformer.py
@@ -166,6 +166,7 @@ class LongformerTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        first_char = tokenizer.convert_ids_to_tokens(encoded[mask_loc + 1])[0]
        self.assertNotEqual(first_char, space_encoding)
+    @unittest.skip
    def test_pretokenized_inputs(self):
        pass

--- a/tests/models/longt5/test_modeling_longt5.py
+++ b/tests/models/longt5/test_modeling_longt5.py
@@ -642,7 +642,7 @@ class LongT5ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
    def test_attention_outputs(self):
        if not self.has_attentions:
-            pass
+            self.skipTest(reason="has_attentions is set to False")
        else:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -769,7 +769,7 @@ class LongT5TGlobalModelTest(LongT5ModelTest):
    def test_attention_outputs(self):
        if not self.has_attentions:
-            pass
+            self.skipTest(reason="has_attentions is set to False")
        else:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -1035,7 +1035,7 @@ class LongT5EncoderOnlyModelTest(ModelTesterMixin, unittest.TestCase):
    def test_attention_outputs(self):
        if not self.has_attentions:
-            pass
+            self.skipTest(reason="has_attentions is set to False")
        else:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
@@ -1107,7 +1107,7 @@ class LongT5EncoderOnlyTGlobalModelTest(LongT5EncoderOnlyModelTest):
    def test_attention_outputs(self):
        if not self.has_attentions:
-            pass
+            self.skipTest(reason="has_attentions is set to False")
        else:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

--- a/tests/models/luke/test_tokenization_luke.py
+++ b/tests/models/luke/test_tokenization_luke.py
@@ -130,6 +130,7 @@ class LukeTokenizerTest(TokenizerTesterMixin, unittest.TestCase):
        first_char = tokenizer.convert_ids_to_tokens(encoded[mask_loc + 1])[0]
        self.assertNotEqual(first_char, space_encoding)
+    @unittest.skip
    def test_pretokenized_inputs(self):
        pass

--- a/tests/models/lxmert/test_modeling_lxmert.py
+++ b/tests/models/lxmert/test_modeling_lxmert.py
@@ -766,15 +766,15 @@ class LxmertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
        return tf_inputs_dict
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_checkpoints(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_no_safetensors(self):
        pass

--- a/tests/models/lxmert/test_tokenization_lxmert.py
+++ b/tests/models/lxmert/test_tokenization_lxmert.py
@@ -68,7 +68,7 @@ class LxmertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()

--- a/tests/models/marian/test_modeling_marian.py
+++ b/tests/models/marian/test_modeling_marian.py
@@ -346,6 +346,7 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
        model.resize_decoder_token_embeddings(config.vocab_size + 1)
        self.assertEqual(model.lm_head.weight.shape, (config.vocab_size + 1, config.d_model))
+    @unittest.skip
    def test_tie_word_embeddings_decoder(self):
        pass
@@ -367,15 +368,15 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
    def test_training_gradient_checkpointing_use_reentrant_false(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_checkpoints(self):
        pass
-    @unittest.skip("No support for low_cpu_mem_usage=True.")
+    @unittest.skip(reason="No support for low_cpu_mem_usage=True.")
    def test_save_load_low_cpu_mem_usage_no_safetensors(self):
        pass
@@ -899,6 +900,6 @@ class MarianStandaloneDecoderModelTest(ModelTesterMixin, GenerationTesterMixin,
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
+    @unittest.skip(reason="Decoder cannot keep gradients")
    def test_retain_grad_hidden_states_attentions(self):
-        # decoder cannot keep gradients
        return
--- a/tests/models/markuplm/test_tokenization_markuplm.py
+++ b/tests/models/markuplm/test_tokenization_markuplm.py
@@ -101,7 +101,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        return questions, nodes, xpaths
-    @unittest.skip("Chat template tests don't play well with table/layout models.")
+    @unittest.skip(reason="Chat template tests don't play well with table/layout models.")
    def test_chat_template_batched(self):
        pass
@@ -207,7 +207,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
                self.assertIn(decoded, [output, output.lower()])
-    @unittest.skip("Not implemented")
+    @unittest.skip(reason="Not implemented")
    def test_right_and_left_truncation(self):
        pass
@@ -708,7 +708,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        encoded_sequences_batch_padded_2[key],
                    )
-    @unittest.skip("batch_encode_plus does not handle overflowing tokens.")
+    @unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
    def test_batch_encode_plus_overflowing_tokens(self):
        pass
@@ -769,7 +769,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.pad_token is None:
-                    self.skipTest("No padding token.")
+                    self.skipTest(reason="No padding token.")
                else:
                    nodes, xpaths = self.get_nodes_and_xpaths()
@@ -814,7 +814,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_build_inputs_with_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1056,13 +1056,13 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        for tokenizer in tokenizers:
            with self.subTest(f"{tokenizer.__class__.__name__}"):
                if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
-                    return
+                    self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
                config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
                config = config_class()
                if config.is_encoder_decoder or config.pad_token_id is None:
-                    return
+                    self.skipTest(reason="Model is an encoder-decoder or does not have a pad token set")
                model = model_class(config)
@@ -1088,11 +1088,11 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_rust_and_python_full_tokenizers(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer = self.get_tokenizer()
        rust_tokenizer = self.get_rust_tokenizer()
@@ -1110,7 +1110,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_tokenization_python_rust_equals(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1172,7 +1172,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_embeded_special_tokens(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1367,7 +1367,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
@@ -1406,7 +1406,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_training_new_tokenizer_with_special_tokens_change(self):
        # This feature only exists for fast tokenizers
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        tokenizer = self.get_rust_tokenizer()
        # Test with a special tokens map
@@ -1519,7 +1519,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_padding_different_model_input_name(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
            with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
@@ -1613,7 +1613,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                        self.assertEqual(len(tokens[key].shape), 3)
                        self.assertEqual(tokens[key].shape[-2], 6)
-    @unittest.skip("TO DO: overwrite this very extensive test.")
+    @unittest.skip(reason="TO DO: overwrite this very extensive test.")
    def test_alignement_methods(self):
        pass
@@ -2152,15 +2152,15 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                    self.assertEqual(xpath_tags_seq, sequence["xpath_tags_seq"][:-2])
                    self.assertEqual(overflowing_xpath_tags_seq, sequence["xpath_tags_seq"][-(2 + stride) :])
-    @unittest.skip("MarkupLM tokenizer requires xpaths besides sequences.")
+    @unittest.skip(reason="MarkupLM tokenizer requires xpaths besides sequences.")
    def test_pretokenized_inputs(self):
        pass
-    @unittest.skip("MarkupLM tokenizer always expects pretokenized inputs.")
+    @unittest.skip(reason="MarkupLM tokenizer always expects pretokenized inputs.")
    def test_compare_pretokenized_inputs(self):
        pass
-    @unittest.skip("MarkupLM fast tokenizer does not support prepare_for_model")
+    @unittest.skip(reason="MarkupLM fast tokenizer does not support prepare_for_model")
    def test_compare_prepare_for_model(self):
        pass
@@ -2264,13 +2264,13 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        self.assertDictEqual(dict(encoding_p), expected_results)
        self.assertDictEqual(dict(encoding_r), expected_results)
-    @unittest.skip("Doesn't support another framework than PyTorch")
+    @unittest.skip(reason="Doesn't support another framework than PyTorch")
    def test_np_encode_plus_sent_to_model(self):
        pass
    def test_padding_warning_message_fast_tokenizer(self):
        if not self.test_rust_tokenizer:
-            return
+            self.skipTest(reason="test_rust_tokenizer is set to False")
        nodes, xpaths = self.get_nodes_and_xpaths()
@@ -2290,7 +2290,7 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
        )
        if not self.test_slow_tokenizer:
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        tokenizer_slow = self.get_tokenizer()
        # check correct behaviour if no pad_token_id exists and add it eventually
@@ -2309,10 +2309,10 @@ class MarkupLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
            cm.records[0].message,
        )
-    @unittest.skip("Chat is not supported")
+    @unittest.skip(reason="Chat is not supported")
    def test_chat_template(self):
        pass
-    @unittest.skip("The model tested fails `Hub -> Fast == Hub -> Slow`, nothing much we can do")
+    @unittest.skip(reason="The model tested fails `Hub -> Fast == Hub -> Slow`, nothing much we can do")
    def test_added_tokens_serialization(self):
        pass
--- a/tests/models/mask2former/test_modeling_mask2former.py
+++ b/tests/models/mask2former/test_modeling_mask2former.py
@@ -276,7 +276,7 @@ class Mask2FormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestC
    def test_training(self):
        if not self.model_tester.is_training:
-            return
+            self.skipTest(reason="model_tester.is_training is set to False")
        model_class = self.all_model_classes[1]
        config, pixel_values, pixel_mask, mask_labels, class_labels = self.model_tester.prepare_config_and_inputs()

--- a/tests/models/maskformer/test_modeling_maskformer_swin.py
+++ b/tests/models/maskformer/test_modeling_maskformer_swin.py
@@ -213,11 +213,11 @@ class MaskFormerSwinModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.Te
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_backbone(*config_and_inputs)
-    @unittest.skip("Swin does not use inputs_embeds")
+    @unittest.skip(reason="Swin does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass
-    @unittest.skip("Swin does not support feedforward chunking")
+    @unittest.skip(reason="Swin does not support feedforward chunking")
    def test_feed_forward_chunking(self):
        pass

--- a/tests/models/mbart/test_modeling_mbart.py
+++ b/tests/models/mbart/test_modeling_mbart.py
@@ -496,7 +496,7 @@ class MBartCC25IntegrationTest(AbstractSeq2SeqIntegrationTest):
    ]
    tgt_text = ["Şeful ONU declară că nu există o soluţie militară în Siria", "to be padded"]
-    @unittest.skip("This test is broken, still generates english")
+    @unittest.skip(reason="This test is broken, still generates english")
    def test_cc25_generate(self):
        inputs = self.tokenizer([self.src_text[0]], return_tensors="pt").to(torch_device)
        translated_tokens = self.model.generate(
@@ -731,6 +731,6 @@ class MBartStandaloneDecoderModelTest(ModelTesterMixin, GenerationTesterMixin, u
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(*config_and_inputs)
+    @unittest.skip(reason="Decoder cannot retain gradients")
    def test_retain_grad_hidden_states_attentions(self):
-        # decoder cannot keep gradients
        return
--- a/tests/models/mbart/test_tokenization_mbart.py
+++ b/tests/models/mbart/test_tokenization_mbart.py
@@ -134,7 +134,7 @@ class MBartTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_save_pretrained(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        self.tokenizers_list[0] = (self.rust_tokenizer_class, "hf-internal-testing/tiny-random-mbart", {})
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
@@ -202,7 +202,7 @@ class MBartTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
                shutil.rmtree(tmpdirname2)
-    @unittest.skip("Need to fix this after #26538")
+    @unittest.skip(reason="Need to fix this after #26538")
    def test_training_new_tokenizer(self):
        pass

--- a/tests/models/mbart50/test_tokenization_mbart50.py
+++ b/tests/models/mbart50/test_tokenization_mbart50.py
@@ -112,7 +112,7 @@ class MBart50TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    def test_save_pretrained(self):
        if not self.test_slow_tokenizer:
            # as we don't have a slow version, we can't compare the outputs between slow and fast versions
-            return
+            self.skipTest(reason="test_slow_tokenizer is set to False")
        self.tokenizers_list[0] = (self.rust_tokenizer_class, "hf-internal-testing/tiny-random-mbart50", {})
        for tokenizer, pretrained_name, kwargs in self.tokenizers_list:

--- a/tests/models/megatron_bert/test_modeling_megatron_bert.py
+++ b/tests/models/megatron_bert/test_modeling_megatron_bert.py
@@ -369,7 +369,7 @@ TOLERANCE = 1e-4
 @require_tokenizers
 class MegatronBertModelIntegrationTests(unittest.TestCase):
    @slow
-    @unittest.skip("Model is not available.")
+    @unittest.skip(reason="Model is not available.")
    def test_inference_no_head(self):
        directory = "nvidia/megatron-bert-uncased-345m"
        if "MYDIR" in os.environ:

--- a/tests/models/megatron_gpt2/test_modeling_megatron_gpt2.py
+++ b/tests/models/megatron_gpt2/test_modeling_megatron_gpt2.py
@@ -31,7 +31,7 @@ if is_torch_available():
 @require_tokenizers
 class MegatronGPT2IntegrationTest(unittest.TestCase):
    @slow
-    @unittest.skip("Model is not available.")
+    @unittest.skip(reason="Model is not available.")
    def test_inference_no_head(self):
        directory = "nvidia/megatron-gpt2-345m/"
        if "MYDIR" in os.environ: