Unverified Commit 6c2ad00c authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Move `is_pipeline_test_to_skip` to specific model test classes (#21999)



* Move `is_pipeline_test_to_skip` to specific model test classes

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 2beabd24
......@@ -237,6 +237,17 @@ class PLBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
test_pruning = False
test_missing_keys = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "TranslationPipelineTests":
# Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
# `PLBartConfig` was never used in pipeline tests: cannot create a simple tokenizer.
return True
return False
def setUp(self):
self.model_tester = PLBartModelTester(self)
self.config_tester = ConfigTester(self, config_class=PLBartConfig)
......
......@@ -904,6 +904,18 @@ class ProphetNetModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTeste
test_resize_embeddings = False
is_encoder_decoder = True
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "TextGenerationPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `ProphetNetConfig` was never used in pipeline tests: cannot create a simple
# tokenizer.
return True
return False
def setUp(self):
self.model_tester = ProphetNetModelTester(self)
self.config_tester = ConfigTester(self, config_class=ProphetNetConfig)
......
......@@ -709,6 +709,22 @@ class ReformerLSHAttnModelTest(
test_headmasking = False
test_torchscript = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = ReformerModelTester(
self,
......
......@@ -586,6 +586,24 @@ class RoCBertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
else {}
)
# TODO: Fix the failed tests when this model gets more usage
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name in [
"FillMaskPipelineTests",
"FeatureExtractionPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
]:
# Get error: IndexError: index out of range in self.
# `word_shape_file` and `word_pronunciation_file` should be shrunk during tiny model creation,
# otherwise `IndexError` could occur in some embedding layers. Skip for now until this model has
# more usage.
return True
return False
# special case for ForPreTraining model
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
......
......@@ -271,6 +271,15 @@ class TFRoFormerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
test_head_masking = False
test_onnx = False
# TODO: add `prepare_inputs_for_generation` for `TFRoFormerForCausalLM`
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "TextGenerationPipelineTests":
return True
return False
def setUp(self):
self.model_tester = TFRoFormerModelTester(self)
self.config_tester = ConfigTester(self, config_class=RoFormerConfig, hidden_size=37)
......
......@@ -486,6 +486,12 @@ class TapasModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
)
return inputs_dict
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
return True
def setUp(self):
self.model_tester = TapasModelTester(self)
self.config_tester = ConfigTester(self, config_class=TapasConfig, dim=37)
......
......@@ -443,6 +443,12 @@ class TFTapasModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCas
test_head_masking = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
return True
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
inputs_dict = copy.deepcopy(inputs_dict)
......
......@@ -177,6 +177,18 @@ class TFTransfoXLModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Tes
test_onnx = False
test_mismatched_shapes = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "TextGenerationPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `TransfoXLConfig` was never used in pipeline tests: cannot create a simple
# tokenizer.
return True
return False
def setUp(self):
self.model_tester = TFTransfoXLModelTester(self)
self.config_tester = ConfigTester(self, config_class=TransfoXLConfig, d_embed=37)
......
......@@ -271,6 +271,18 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
test_resize_embeddings = True
test_mismatched_shapes = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "TextGenerationPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `TransfoXLConfig` was never used in pipeline tests: cannot create a simple
# tokenizer.
return True
return False
def check_cutoffs_and_n_token(
self, copied_cutoffs, layer, model_embed, model, model_class, resized_value, vocab_size
):
......
......@@ -309,6 +309,26 @@ class TFXLMModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
test_head_masking = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "FillMaskPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `XLMConfig` was never used in pipeline tests: cannot create a simple tokenizer
return True
elif (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = TFXLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=XLMConfig, emb_dim=37)
......
......@@ -391,6 +391,26 @@ class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
else {}
)
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "FillMaskPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `XLMConfig` was never used in pipeline tests: cannot create a simple tokenizer
return True
elif (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
# XLM has 2 QA models -> need to manually set the correct labels for one of them here
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
......
......@@ -172,7 +172,7 @@ class PipelineTesterMixin:
for tokenizer_name in tokenizer_names:
for processor_name in processor_names:
if is_test_to_skip(
if self.is_pipeline_test_to_skip(
pipeline_test_class_name,
model_architecture.config_class,
model_architecture,
......@@ -404,6 +404,11 @@ class PipelineTesterMixin:
def test_pipeline_zero_shot_object_detection(self):
self.run_task_tests(task="zero-shot-object-detection")
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
return False
def validate_test_components(test_case, task, model, tokenizer, processor):
# TODO: Move this to tiny model creation script
......@@ -431,109 +436,3 @@ def validate_test_components(test_case, task, model, tokenizer, processor):
f"(`{tokenizer.__class__.__name__}`) has {len(tokenizer)} tokens which is greater than "
f"`config_vocab_size` ({config_vocab_size}). Something is wrong."
)
def is_test_to_skip(test_casse_name, config_class, model_architecture, tokenizer_name, processor_name):
"""Some tests are just not working"""
to_skip = False
if config_class.__name__ == "RoCBertConfig" and test_casse_name in [
"FillMaskPipelineTests",
"FeatureExtractionPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
]:
# Get error: IndexError: index out of range in self.
# `word_shape_file` and `word_pronunciation_file` should be shrunk during tiny model creation,
# otherwise `IndexError` could occur in some embedding layers. Skip for now until this model has
# more usage.
to_skip = True
elif config_class.__name__ in ["LayoutLMv3Config", "LiltConfig"]:
# Get error: ValueError: Words must be of type `List[str]`. Previously, `LayoutLMv3` is not
# used in pipeline tests as it could not find a checkpoint
# TODO: check and fix if possible
to_skip = True
# config/model class we decide to skip
elif config_class.__name__ in ["TapasConfig"]:
# Get error: AssertionError: Table must be of type pd.DataFrame. Also, the tiny model has large
# vocab size as the fast tokenizer could not be converted. Previous, `Tapas` is not used in
# pipeline tests due to the same reason.
# TODO: check and fix if possible
to_skip = True
# TODO: check and fix if possible
if not to_skip and tokenizer_name is not None:
if (
test_casse_name == "QAPipelineTests"
and not tokenizer_name.endswith("Fast")
and config_class.__name__
in [
"FlaubertConfig",
"GPTJConfig",
"LongformerConfig",
"MvpConfig",
"OPTConfig",
"ReformerConfig",
"XLMConfig",
]
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
to_skip = True
elif test_casse_name == "ZeroShotClassificationPipelineTests" and config_class.__name__ in [
"CTRLConfig",
"OpenAIGPTConfig",
]:
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `CTRLConfig` and `OpenAIGPTConfig` were never used in pipeline tests, either because of a missing
# checkpoint or because a tiny config could not be created
to_skip = True
elif test_casse_name == "TranslationPipelineTests" and config_class.__name__ in [
"M2M100Config",
"PLBartConfig",
]:
# Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
# `M2M100Config` and `PLBartConfig` were never used in pipeline tests: cannot create a simple tokenizer
to_skip = True
elif test_casse_name == "TextGenerationPipelineTests" and config_class.__name__ in [
"ProphetNetConfig",
"TransfoXLConfig",
]:
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `TransfoXLConfig` and `ProphetNetConfig` were never used in pipeline tests: cannot create a simple
# tokenizer.
to_skip = True
elif test_casse_name == "FillMaskPipelineTests" and config_class.__name__ in [
"FlaubertConfig",
"XLMConfig",
]:
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `FlaubertConfig` and `TransfoXLConfig` were never used in pipeline tests: cannot create a simple
# tokenizer
to_skip = True
elif test_casse_name == "TextGenerationPipelineTests" and model_architecture.__name__ in [
"TFRoFormerForCausalLM"
]:
# TODO: add `prepare_inputs_for_generation` for `TFRoFormerForCausalLM`
to_skip = True
elif test_casse_name == "QAPipelineTests" and model_architecture.__name__ in ["FNetForQuestionAnswering"]:
# TODO: The change in `base.py` in the PR #21132 (https://github.com/huggingface/transformers/pull/21132)
# fails this test case. Skip for now - a fix for this along with the initial changes in PR #20426 is
# too much. Let `ydshieh` to fix it ASAP once #20426 is merged.
to_skip = True
elif config_class.__name__ == "LayoutLMv2Config" and test_casse_name in [
"QAPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
"ZeroShotClassificationPipelineTests",
]:
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
to_skip = True
elif test_casse_name == "DocumentQuestionAnsweringPipelineTests" and not tokenizer_name.endswith("Fast"):
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
to_skip = True
return to_skip
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment