Unverified Commit 1de7dc74 authored by amyeroberts's avatar amyeroberts Committed by GitHub
Browse files

Skip tests properly (#31308)

* Skip tests properly

* [test_all]

* Add 'reason' as kwarg for skipTest

* [test_all] Fix up

* [test_all]
parent 1f9f57ab
......@@ -262,9 +262,11 @@ class GroupViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
],
)
@unittest.skip
def test_training(self):
pass
@unittest.skip
def test_training_gradient_checkpointing(self):
pass
......@@ -458,9 +460,11 @@ class GroupViTTextModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs)
@unittest.skip
def test_training(self):
pass
@unittest.skip
def test_training_gradient_checkpointing(self):
pass
......@@ -618,7 +622,7 @@ class GroupViTModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
return
self.skipTest(reason="test_torchscript is set to False")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.torchscript = True
......
......@@ -95,7 +95,7 @@ class HerbertTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_tokenizer()
rust_tokenizer = self.get_rust_tokenizer()
......
......@@ -350,22 +350,21 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
# Hubert has no inputs_embeds
@unittest.skip(reason="Hubert has no inputs_embeds")
def test_inputs_embeds(self):
pass
# `input_ids` is renamed to `input_values`
@unittest.skip(reason="Hubert has no inputs_embeds")
def test_forward_signature(self):
pass
# Hubert cannot resize token embeddings
# since it has no tokens embeddings
@unittest.skip(reason="Hubert has no tokens embeddings")
def test_resize_tokens_embeddings(self):
pass
# Hubert has no inputs_embeds
# and thus the `get_input_embeddings` fn
# is not implemented
@unittest.skip(reason="Hubert has no inputs_embeds")
def test_model_get_set_embeddings(self):
pass
......@@ -438,10 +437,10 @@ class HubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
# Hubert cannot be TorchScripted because of torch.nn.utils.weight_norm
def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False):
# TODO: fix it
self.skipTest("torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
self.skipTest(reason="torch 2.1 breaks torch fx tests for wav2vec2/hubert.")
if not is_torch_fx_available() or not self.fx_compatible:
return
self.skipTest(reason="torch fx is not available or not compatible with this model")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.return_dict = False
......@@ -615,22 +614,19 @@ class HubertRobustModelTest(ModelTesterMixin, unittest.TestCase):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_labels_out_of_vocab(*config_and_inputs)
# Hubert has no inputs_embeds
@unittest.skip(reason="Hubert has no inputs_embeds")
def test_inputs_embeds(self):
pass
# `input_ids` is renamed to `input_values`
@unittest.skip(reason="Hubert has input_values instead of input_ids")
def test_forward_signature(self):
pass
# Hubert cannot resize token embeddings
# since it has no tokens embeddings
@unittest.skip(reason="Hubert has no tokens embeddings")
def test_resize_tokens_embeddings(self):
pass
# Hubert has no inputs_embeds
# and thus the `get_input_embeddings` fn
# is not implemented
@unittest.skip(reason="Hubert has no inputs_embeds")
def test_model_get_set_embeddings(self):
pass
......
......@@ -379,7 +379,7 @@ class IBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
with torch.no_grad():
model(**inputs)[0]
@unittest.skip("ibert overrides scaling to None if inputs_embeds")
@unittest.skip(reason="ibert overrides scaling to None if inputs_embeds")
def test_inputs_embeds_matches_input_ids(self):
pass
......
......@@ -191,18 +191,18 @@ class IdeficsImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
torch.testing.assert_close(pixel_values_transform_implied, pixel_values_transform_supplied, rtol=0.0, atol=0.0)
@unittest.skip("not supported")
@unittest.skip(reason="not supported")
def test_call_numpy(self):
pass
@unittest.skip("not supported")
@unittest.skip(reason="not supported")
def test_call_numpy_4_channels(self):
pass
@unittest.skip("not supported")
@unittest.skip(reason="not supported")
def test_call_pil(self):
pass
@unittest.skip("not supported")
@unittest.skip(reason="not supported")
def test_call_pytorch(self):
pass
......@@ -316,7 +316,7 @@ class IdeficsModelTester:
@slow
@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
def test_eager_matches_sdpa_inference(self, torch_dtype: str):
self.skipTest("Idefics has a hard requirement on SDPA, skipping this test")
self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
@unittest.skipIf(not is_torch_greater_or_equal_than_2_0, reason="pytorch 2.0 or higher is required")
......@@ -422,13 +422,13 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def test_training(self):
if not self.model_tester.is_training:
return
self.skipTest(reason="model_tester.is_training is set to False")
for model_class in self.all_model_classes:
# IdeficsModel does not support training, users should use
# IdeficsForVisionText2Text for this purpose
if model_class == IdeficsModel:
return
self.skipTest(reason="IdeficsModel does not support training")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.return_dict = True
......@@ -442,13 +442,13 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def test_training_gradient_checkpointing(self):
if not self.model_tester.is_training:
return
self.skipTest(reason="model_tester.is_training is set to False")
for model_class in self.all_model_classes:
# IdeficsModel does not support training, users should use
# IdeficsForVisionText2Text for this purpose
if model_class == IdeficsModel:
return
self.skipTest(reason="IdeficsModel does not support training")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.use_cache = False
......@@ -575,7 +575,7 @@ class IdeficsModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
@slow
@parameterized.expand([("float16",), ("bfloat16",), ("float32",)])
def test_eager_matches_sdpa_inference(self, torch_dtype: str):
self.skipTest("Idefics has a hard requirement on SDPA, skipping this test")
self.skipTest(reason="Idefics has a hard requirement on SDPA, skipping this test")
@unittest.skipIf(not is_torch_greater_or_equal_than_2_0, reason="pytorch 2.0 or higher is required")
......@@ -590,11 +590,11 @@ class IdeficsForVisionText2TextTest(IdeficsModelTest, unittest.TestCase):
)
self.config_tester = ConfigTester(self, config_class=IdeficsConfig, hidden_size=37)
@unittest.skip("We only test the model that takes in multiple images")
@unittest.skip(reason="We only test the model that takes in multiple images")
def test_model(self):
pass
@unittest.skip("We only test the model that takes in multiple images")
@unittest.skip(reason="We only test the model that takes in multiple images")
def test_for_token_classification(self):
pass
......
......@@ -176,19 +176,19 @@ class Idefics2ModelTest(ModelTesterMixin, unittest.TestCase):
self.model_tester = Idefics2VisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
@unittest.skip("input_embeds cannot be passed in without input_ids")
@unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds():
pass
@unittest.skip("input_embeds cannot be passed in without input_ids")
@unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds_matches_input_ids(self):
pass
@unittest.skip("Model does not support padding right")
@unittest.skip(reason="Model does not support padding right")
def test_flash_attn_2_generate_padding_right(self):
pass
@unittest.skip("Model does not support padding right")
@unittest.skip(reason="Model does not support padding right")
def test_flash_attn_2_inference_padding_right(self):
pass
......@@ -336,15 +336,15 @@ class Idefics2ForConditionalGenerationModelTest(GenerationTesterMixin, ModelTest
self.model_tester = Idefics2VisionText2TextModelTester(self)
self.config_tester = ConfigTester(self, config_class=Idefics2Config, has_text_modality=False)
@unittest.skip("input_embeds cannot be passed in without input_ids")
@unittest.skip(reason="input_embeds cannot be passed in without input_ids")
def test_inputs_embeds():
pass
@unittest.skip("Model does not support padding right")
@unittest.skip(reason="Model does not support padding right")
def test_flash_attn_2_generate_padding_right(self):
pass
@unittest.skip("Model does not support padding right")
@unittest.skip(reason="Model does not support padding right")
def test_flash_attn_2_inference_padding_right(self):
pass
......
......@@ -176,7 +176,7 @@ class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
else:
self.assertEqual(image_processor_first[key], value)
@unittest.skip("ImageGPT requires clusters at initialization")
@unittest.skip(reason="ImageGPT requires clusters at initialization")
def test_init_without_params(self):
pass
......@@ -220,7 +220,7 @@ class ImageGPTImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase):
tuple(encoded_images.shape), (self.image_processor_tester.batch_size, *expected_output_image_shape)
)
@unittest.skip("ImageGPT assumes clusters for 3 channels")
@unittest.skip(reason="ImageGPT assumes clusters for 3 channels")
def test_call_numpy_4_channels(self):
pass
......
......@@ -357,7 +357,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
inputs_dict,
) = self.model_tester.prepare_config_and_inputs_for_common()
if not self.test_resize_embeddings:
return
self.skipTest(reason="test_resize_embeddings is set to False")
for model_class in self.all_model_classes:
config = copy.deepcopy(original_config)
......@@ -404,13 +404,13 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
inputs_dict,
) = self.model_tester.prepare_config_and_inputs_for_common()
if not self.test_resize_embeddings:
return
self.skipTest(reason="test_resize_embeddings is set to False")
original_config.tie_word_embeddings = False
# if model cannot untied embeddings -> leave test
if original_config.tie_word_embeddings:
return
self.skipTest(reason="tie_word_embeddings is set to False")
for model_class in self.all_model_classes:
config = copy.deepcopy(original_config)
......@@ -493,7 +493,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
return
self.skipTest(reason="test_torchscript is set to False")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.torchscript = True
......@@ -573,7 +573,7 @@ class ImageGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
self.assertTrue(models_equal)
@unittest.skip("The model doesn't support left padding") # and it's not used enough to be worth fixing :)
@unittest.skip(reason="The model doesn't support left padding") # and it's not used enough to be worth fixing :)
def test_left_padding_compatibility(self):
pass
......
......@@ -278,17 +278,19 @@ class InformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
check_hidden_states_output(inputs_dict, config, model_class)
# Ignore since we have no tokens embeddings
@unittest.skip(reason="Informer does not have tokens embeddings")
def test_resize_tokens_embeddings(self):
pass
@unittest.skip
def test_model_outputs_equivalence(self):
pass
@unittest.skip
def test_determinism(self):
pass
@unittest.skip("randomly selects U keys while calculating attentions")
@unittest.skip(reason="randomly selects U keys while calculating attentions")
def test_batching_equivalence(self):
pass
......
......@@ -390,7 +390,7 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
Overriding the test_mismatched_shapes_have_properly_initialized_weights test because A_log and D params of the
Mamba block are initialized differently and we tested that in test_initialization
"""
self.skipTest("Cumbersome and redundant for Jamba")
self.skipTest(reason="Cumbersome and redundant for Jamba")
def test_attention_outputs(self):
r"""
......@@ -638,9 +638,9 @@ class JambaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
Overriding the test_flash_attn_2_inference_padding_right test as the Jamba model, like Mixtral, doesn't support
right padding + use cache with FA2
"""
self.skipTest("Jamba flash attention does not support right padding")
self.skipTest(reason="Jamba flash attention does not support right padding")
@unittest.skip("Jamba has its own special cache type")
@unittest.skip(reason="Jamba has its own special cache type")
@parameterized.expand([(1, False), (1, True), (4, False)])
def test_new_cache_format(self, num_beams, do_sample):
pass
......
......@@ -378,11 +378,11 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
result = model(input_ids, attention_mask=attention_mask, labels=sequence_labels)
self.assertEqual(result.logits.shape, (self.model_tester.batch_size, self.model_tester.num_labels))
@unittest.skip("JetMoe buffers include complex numbers, which breaks this test")
@unittest.skip(reason="JetMoe buffers include complex numbers, which breaks this test")
def test_save_load_fast_init_from_base(self):
pass
@unittest.skip("JetMoe uses MoA on all models so the KV cache is a non standard format")
@unittest.skip(reason="JetMoe uses MoA on all models so the KV cache is a non standard format")
def test_past_key_values_format(self):
pass
......@@ -470,7 +470,7 @@ class JetMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
@pytest.mark.flash_attn_test
@slow
def test_flash_attn_2_inference_equivalence_right_padding(self):
self.skipTest("JetMoe flash attention does not support right padding")
self.skipTest(reason="JetMoe flash attention does not support right padding")
@require_torch
......
......@@ -375,7 +375,7 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
# overwrite from common in order to use `config.text_config.vocab_size` instead of `config.vocab_size`
def test_tie_model_weights(self):
if not self.test_torchscript:
return
self.skipTest(reason="test_torchscript is set to False")
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
......@@ -429,7 +429,7 @@ class Kosmos2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
def _create_and_check_torchscript(self, config, inputs_dict):
if not self.test_torchscript:
return
self.skipTest(reason="test_torchscript is set to False")
configs_no_init = _config_zero_init(config) # To be sure we have no Nan
configs_no_init.torchscript = True
......
......@@ -69,6 +69,7 @@ class LayoutLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertListEqual(tokens, ["un", "##want", "##ed", ",", "runn", "##ing"])
self.assertListEqual(tokenizer.convert_tokens_to_ids(tokens), [7, 4, 5, 10, 8, 9])
@unittest.skip
def test_special_tokens_as_you_expect(self):
"""If you are training a seq2seq model that expects a decoder_prefix token make sure it is prepended to decoder_input_ids"""
pass
......@@ -96,7 +96,7 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
image_processor = self.image_processing_class.from_dict(self.image_processor_dict, size=42)
self.assertEqual(image_processor.size, {"height": 42, "width": 42})
@unittest.skip("Tesseract version is not correct in ci. @Arthur FIXME")
@unittest.skip(reason="Tesseract version is not correct in ci. @Arthur FIXME")
def test_layoutlmv2_integration_test(self):
# with apply_OCR = True
image_processing = LayoutLMv2ImageProcessor()
......
......@@ -414,7 +414,7 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
check_hidden_states_output(inputs_dict, config, model_class)
@unittest.skip("We cannot configure detectron2 to output a smaller backbone")
@unittest.skip(reason="We cannot configure detectron2 to output a smaller backbone")
def test_model_is_small(self):
pass
......
......@@ -195,7 +195,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
tokenizer.tokenize(" \tHeLLo!how \n Are yoU? [UNK]"), ["HeLLo", "!", "how", "Are", "yoU", "?", "[UNK]"]
)
@unittest.skip("Chat template tests don't play well with table/layout models.")
@unittest.skip(reason="Chat template tests don't play well with table/layout models.")
def test_chat_template_batched(self):
pass
......@@ -385,11 +385,11 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
self.assertIn(decoded, [output, output.lower()])
@unittest.skip("Not implemented")
@unittest.skip(reason="Not implemented")
def test_right_and_left_truncation(self):
pass
@unittest.skip("Not implemented")
@unittest.skip(reason="Not implemented")
def test_split_special_tokens(self):
pass
......@@ -814,7 +814,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding_warning_message_fast_tokenizer(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
words, boxes = self.get_words_and_boxes_batch()
......@@ -835,7 +835,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
if not self.test_slow_tokenizer:
return
self.skipTest(reason="test_slow_tokenizer is set to False")
tokenizer_slow = self.get_tokenizer()
......@@ -942,7 +942,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
encoded_sequences_batch_padded_2[key],
)
@unittest.skip("batch_encode_plus does not handle overflowing tokens.")
@unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
def test_batch_encode_plus_overflowing_tokens(self):
pass
......@@ -1003,7 +1003,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.pad_token is None:
self.skipTest("No padding token.")
self.skipTest(reason="No padding token.")
else:
words, boxes = self.get_words_and_boxes()
......@@ -1046,7 +1046,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_build_inputs_with_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1290,13 +1290,13 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
return
self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
config = config_class()
if config.is_encoder_decoder or config.pad_token_id is None:
return
self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
model = model_class(config)
......@@ -1327,11 +1327,11 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
tokenizer = self.get_tokenizer()
rust_tokenizer = self.get_rust_tokenizer()
......@@ -1349,7 +1349,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_tokenization_python_rust_equals(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1403,7 +1403,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1593,7 +1593,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_training_new_tokenizer(self):
# This feature only exists for fast tokenizers
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_rust_tokenizer()
new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
......@@ -1630,7 +1630,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_training_new_tokenizer_with_special_tokens_change(self):
# This feature only exists for fast tokenizers
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_rust_tokenizer()
# Test with a special tokens map
......@@ -1743,7 +1743,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding_different_model_input_name(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1837,7 +1837,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(len(tokens[key].shape), 3)
self.assertEqual(tokens[key].shape[-1], 4)
@unittest.skip("TO DO: overwrite this very extensive test.")
@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
pass
......@@ -1875,7 +1875,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
return words, boxes, output_ids
# @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
# @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
def test_maximum_encoding_length_pair_input(self):
tokenizers = self.get_tokenizers(do_lower_case=False, model_max_length=100)
for tokenizer in tokenizers:
......@@ -2237,7 +2237,7 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(bbox, bbox_second_sequence)
self.assertEqual(overflowing_bbox, overflowing_token_bbox_second_sequence_slow)
# @unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
# @unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
def test_maximum_encoding_length_single_input(self):
tokenizers = self.get_tokenizers(do_lower_case=False, model_max_length=100)
for tokenizer in tokenizers:
......@@ -2359,15 +2359,15 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(bbox, sequence["bbox"][:-2])
self.assertEqual(overflowing_bbox, sequence["bbox"][-(2 + stride) :])
@unittest.skip("LayoutLMv2 tokenizer requires boxes besides sequences.")
@unittest.skip(reason="LayoutLMv2 tokenizer requires boxes besides sequences.")
def test_pretokenized_inputs(self):
pass
@unittest.skip("LayoutLMv2 tokenizer always expects pretokenized inputs.")
@unittest.skip(reason="LayoutLMv2 tokenizer always expects pretokenized inputs.")
def test_compare_pretokenized_inputs(self):
pass
@unittest.skip("LayoutLMv2 fast tokenizer does not support prepare_for_model")
@unittest.skip(reason="LayoutLMv2 fast tokenizer does not support prepare_for_model")
def test_compare_prepare_for_model(self):
pass
......@@ -2476,10 +2476,10 @@ class LayoutLMv2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertDictEqual(dict(encoding_p), expected_results)
self.assertDictEqual(dict(encoding_r), expected_results)
@unittest.skip("Doesn't support another framework than PyTorch")
@unittest.skip(reason="Doesn't support another framework than PyTorch")
def test_np_encode_plus_sent_to_model(self):
pass
@unittest.skip("Chat is not supported")
@unittest.skip(reason="Chat is not supported")
def test_chat_template(self):
pass
......@@ -140,7 +140,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
output_text = "lower newer"
return input_text, output_text
@unittest.skip("Chat template tests don't play well with table/layout models.")
@unittest.skip(reason="Chat template tests don't play well with table/layout models.")
def test_chat_template_batched(self):
pass
......@@ -265,11 +265,11 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
decoded = tokenizer.decode(encoded, spaces_between_special_tokens=self.space_between_special_tokens)
self.assertIn(decoded, [output, output.lower()])
@unittest.skip("Not implemented")
@unittest.skip(reason="Not implemented")
def test_right_and_left_truncation(self):
pass
@unittest.skip("Not implemented")
@unittest.skip(reason="Not implemented")
def test_split_special_tokens(self):
pass
......@@ -694,7 +694,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding_warning_message_fast_tokenizer(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
words, boxes = self.get_words_and_boxes_batch()
......@@ -715,7 +715,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
if not self.test_slow_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer_slow = self.get_tokenizer()
......@@ -822,7 +822,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
encoded_sequences_batch_padded_2[key],
)
@unittest.skip("batch_encode_plus does not handle overflowing tokens.")
@unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
def test_batch_encode_plus_overflowing_tokens(self):
pass
......@@ -883,7 +883,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.pad_token is None:
self.skipTest("No padding token.")
self.skipTest(reason="No padding token.")
else:
words, boxes = self.get_words_and_boxes()
......@@ -926,7 +926,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_build_inputs_with_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_rust_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1168,13 +1168,13 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
return
self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
config = config_class()
if config.is_encoder_decoder or config.pad_token_id is None:
return
self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
model = model_class(config)
......@@ -1205,11 +1205,11 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
tokenizer = self.get_tokenizer()
rust_tokenizer = self.get_rust_tokenizer()
......@@ -1227,7 +1227,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_tokenization_python_rust_equals(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1281,7 +1281,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1471,7 +1471,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_training_new_tokenizer(self):
# This feature only exists for fast tokenizers
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_rust_tokenizer()
new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
......@@ -1508,7 +1508,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_training_new_tokenizer_with_special_tokens_change(self):
# This feature only exists for fast tokenizers
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_rust_tokenizer()
# Test with a special tokens map
......@@ -1621,7 +1621,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding_different_model_input_name(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1720,7 +1720,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertEqual(len(tokens[key].shape), 3)
self.assertEqual(tokens[key].shape[-1], 4)
@unittest.skip("TO DO: overwrite this very extensive test.")
@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
pass
......@@ -2272,15 +2272,15 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
# self.assertEqual(bbox, sequence["bbox"][:-2])
# self.assertEqual(overflowing_bbox, sequence["bbox"][-(2 + stride) :])
@unittest.skip("LayoutLMv3 tokenizer requires boxes besides sequences.")
@unittest.skip(reason="LayoutLMv3 tokenizer requires boxes besides sequences.")
def test_pretokenized_inputs(self):
pass
@unittest.skip("LayoutLMv3 tokenizer always expects pretokenized inputs.")
@unittest.skip(reason="LayoutLMv3 tokenizer always expects pretokenized inputs.")
def test_compare_pretokenized_inputs(self):
pass
@unittest.skip("LayoutLMv3 fast tokenizer does not support prepare_for_model")
@unittest.skip(reason="LayoutLMv3 fast tokenizer does not support prepare_for_model")
def test_compare_prepare_for_model(self):
pass
......@@ -2393,7 +2393,7 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertDictEqual(dict(encoding_p), expected_results)
self.assertDictEqual(dict(encoding_r), expected_results)
@unittest.skip("Doesn't support another framework than PyTorch")
@unittest.skip(reason="Doesn't support another framework than PyTorch")
def test_np_encode_plus_sent_to_model(self):
pass
......@@ -2408,13 +2408,13 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
return
self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
config = config_class()
if config.is_encoder_decoder or config.pad_token_id is None:
return
self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
model = model_class(config)
......@@ -2433,6 +2433,6 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
model(encoded_sequence)
model(batch_encoded_sequence)
@unittest.skip("Chat is not supported")
@unittest.skip(reason="Chat is not supported")
def test_chat_template(self):
pass
......@@ -107,7 +107,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
output_text = "unwanted, running"
return input_text, output_text
@unittest.skip("Chat template tests don't play well with table/layout models.")
@unittest.skip(reason="Chat template tests don't play well with table/layout models.")
def test_chat_template_batched(self):
pass
......@@ -115,7 +115,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
# this tokenizer
def test_save_sentencepiece_tokenizer(self) -> None:
if not self.test_sentencepiece or not self.test_slow_tokenizer:
return
self.skipTest(reason="test_sentencepiece or test_slow_tokenizer is set to False")
# We want to verify that we will be able to save the tokenizer even if the original files that were used to
# build the tokenizer have been deleted in the meantime.
words, boxes = self.get_words_and_boxes()
......@@ -745,7 +745,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding_warning_message_fast_tokenizer(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
words, boxes = self.get_words_and_boxes_batch()
......@@ -766,7 +766,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
)
if not self.test_slow_tokenizer:
return
self.skipTest(reason="test_slow_tokenizer is set to False")
tokenizer_slow = self.get_tokenizer()
......@@ -873,7 +873,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
encoded_sequences_batch_padded_2[key],
)
@unittest.skip("batch_encode_plus does not handle overflowing tokens.")
@unittest.skip(reason="batch_encode_plus does not handle overflowing tokens.")
def test_batch_encode_plus_overflowing_tokens(self):
pass
......@@ -934,7 +934,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.pad_token is None:
self.skipTest("No padding token.")
self.skipTest(reason="No padding token.")
else:
words, boxes = self.get_words_and_boxes()
......@@ -977,7 +977,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_build_inputs_with_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1066,7 +1066,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
shutil.rmtree(tmpdirname)
@unittest.skip("Not implemented")
@unittest.skip(reason="Not implemented")
def test_right_and_left_truncation(self):
pass
......@@ -1224,13 +1224,13 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
for tokenizer in tokenizers:
with self.subTest(f"{tokenizer.__class__.__name__}"):
if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING:
return
self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING")
config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__]
config = config_class()
if config.is_encoder_decoder or config.pad_token_id is None:
return
self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.")
model = model_class(config)
......@@ -1256,11 +1256,11 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_rust_and_python_full_tokenizers(self):
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
tokenizer = self.get_tokenizer()
rust_tokenizer = self.get_rust_tokenizer()
......@@ -1278,7 +1278,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_tokenization_python_rust_equals(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1332,7 +1332,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_embeded_special_tokens(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1522,7 +1522,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_training_new_tokenizer(self):
# This feature only exists for fast tokenizers
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_rust_tokenizer()
new_tokenizer = tokenizer.train_new_from_iterator(SMALL_TRAINING_CORPUS, 100)
......@@ -1559,7 +1559,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_training_new_tokenizer_with_special_tokens_change(self):
# This feature only exists for fast tokenizers
if not self.test_rust_tokenizer:
return
self.skipTest(reason="test_rust_tokenizer is set to False")
tokenizer = self.get_rust_tokenizer()
# Test with a special tokens map
......@@ -1672,7 +1672,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_padding_different_model_input_name(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
with self.subTest(f"{tokenizer.__class__.__name__} ({pretrained_name})"):
......@@ -1770,7 +1770,7 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
def test_save_pretrained(self):
if not self.test_slow_tokenizer:
# as we don't have a slow version, we can't compare the outputs between slow and fast versions
return
self.skipTest(reason="test_slow_tokenizer is set to False")
self.tokenizers_list[0] = (self.rust_tokenizer_class, "hf-internal-testing/tiny-random-layoutxlm", {})
for tokenizer, pretrained_name, kwargs in self.tokenizers_list:
......@@ -1838,27 +1838,27 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
shutil.rmtree(tmpdirname2)
@unittest.skip("TO DO: overwrite this very extensive test.")
@unittest.skip(reason="TO DO: overwrite this very extensive test.")
def test_alignement_methods(self):
pass
@unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
@unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
def test_maximum_encoding_length_pair_input(self):
pass
@unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
@unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
def test_maximum_encoding_length_single_input(self):
pass
@unittest.skip("layoutxlm tokenizer requires boxes besides sequences.")
@unittest.skip(reason="layoutxlm tokenizer requires boxes besides sequences.")
def test_pretokenized_inputs(self):
pass
@unittest.skip("layoutxlm tokenizer always expects pretokenized inputs.")
@unittest.skip(reason="layoutxlm tokenizer always expects pretokenized inputs.")
def test_compare_pretokenized_inputs(self):
pass
@unittest.skip("layoutxlm fast tokenizer does not support prepare_for_model")
@unittest.skip(reason="layoutxlm fast tokenizer does not support prepare_for_model")
def test_compare_prepare_for_model(self):
pass
......@@ -1962,18 +1962,18 @@ class LayoutXLMTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
self.assertDictEqual(dict(encoding_p), expected_results)
self.assertDictEqual(dict(encoding_r), expected_results)
@unittest.skip("Doesn't support another framework than PyTorch")
@unittest.skip(reason="Doesn't support another framework than PyTorch")
def test_np_encode_plus_sent_to_model(self):
pass
@unittest.skip("Doesn't use SentencePiece")
@unittest.skip(reason="Doesn't use SentencePiece")
def test_sentencepiece_tokenize_and_convert_tokens_to_string(self):
pass
@unittest.skip("Doesn't use SentencePiece")
@unittest.skip(reason="Doesn't use SentencePiece")
def test_sentencepiece_tokenize_and_decode(self):
pass
@unittest.skip("Chat is not supported")
@unittest.skip(reason="Chat is not supported")
def test_chat_template(self):
pass
......@@ -378,8 +378,8 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
@unittest.skip(reason="Longformer cannot keep gradients in attentions or hidden states")
def test_retain_grad_hidden_states_attentions(self):
# longformer cannot keep gradients in attentions or hidden states
return
def test_attention_outputs(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment