Unverified Commit 6c2ad00c authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Move `is_pipeline_test_to_skip` to specific model test classes (#21999)



* Move `is_pipeline_test_to_skip` to specific model test classes

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 2beabd24
......@@ -173,6 +173,56 @@ def parse_module_content(content: str) -> List[str]:
return objects
def extract_block(content: str, indent_level: int = 0) -> str:
"""Return the first block in `content` with the indent level `indent_level`.
The first line in `content` should be indented at `indent_level` level, otherwise an error will be thrown.
This method will immediately stop the search when a (non-empty) line with indent level less than `indent_level` is
encountered.
Args:
content (`str`): The content to parse
indent_level (`int`, *optional*, default to 0): The indent level of the blocks to search for
Returns:
`str`: The first block in `content` with the indent level `indent_level`.
"""
current_object = []
lines = content.split("\n")
# Doc-styler takes everything between two triple quotes in docstrings, so we need a fake """ here to go with this.
end_markers = [")", "]", "}", '"""']
for idx, line in enumerate(lines):
if idx == 0 and indent_level > 0 and not is_empty_line(line) and find_indent(line) != indent_level:
raise ValueError(
f"When `indent_level > 0`, the first line in `content` should have indent level {indent_level}. Got "
f"{find_indent(line)} instead."
)
if find_indent(line) < indent_level and not is_empty_line(line):
break
# End of an object
is_valid_object = len(current_object) > 0
if (
not is_empty_line(line)
and not line.endswith(":")
and find_indent(line) == indent_level
and is_valid_object
):
# Closing parts should be included in current object
if line.lstrip() in end_markers:
current_object.append(line)
return "\n".join(current_object)
else:
current_object.append(line)
# Add last object
if len(current_object) > 0:
return "\n".join(current_object)
def add_content_to_text(
text: str,
content: str,
......@@ -402,12 +452,53 @@ SPECIAL_PATTERNS = {
_re_class_func = re.compile(r"^(?:class|def)\s+([^\s:\(]+)\s*(?:\(|\:)", flags=re.MULTILINE)
def remove_attributes(obj, target_attr):
"""Remove `target_attr` in `obj`."""
lines = obj.split(os.linesep)
target_idx = None
for idx, line in enumerate(lines):
# search for assignment
if line.lstrip().startswith(f"{target_attr} = "):
target_idx = idx
break
# search for function/method definition
elif line.lstrip().startswith(f"def {target_attr}("):
target_idx = idx
break
# target not found
if target_idx is None:
return obj
line = lines[target_idx]
indent_level = find_indent(line)
# forward pass to find the ending of the block (including empty lines)
parsed = extract_block("\n".join(lines[target_idx:]), indent_level)
num_lines = len(parsed.split("\n"))
for idx in range(num_lines):
lines[target_idx + idx] = None
# backward pass to find comments or decorator
for idx in range(target_idx - 1, -1, -1):
line = lines[idx]
if (line.lstrip().startswith("#") or line.lstrip().startswith("@")) and find_indent(line) == indent_level:
lines[idx] = None
else:
break
new_obj = os.linesep.join([x for x in lines if x is not None])
return new_obj
def duplicate_module(
module_file: Union[str, os.PathLike],
old_model_patterns: ModelPatterns,
new_model_patterns: ModelPatterns,
dest_file: Optional[str] = None,
add_copied_from: bool = True,
attrs_to_remove: List[str] = None,
):
"""
Create a new module from an existing one and adapting all function and classes names from old patterns to new ones.
......@@ -491,8 +582,14 @@ def duplicate_module(
new_objects.append(obj)
content = "\n".join(new_objects)
# Remove some attributes that we don't want to copy to the new file(s)
if attrs_to_remove is not None:
for attr in attrs_to_remove:
content = remove_attributes(content, target_attr=attr)
with open(dest_file, "w", encoding="utf-8") as f:
content = f.write("\n".join(new_objects))
f.write(content)
def filter_framework_files(
......@@ -1294,6 +1391,7 @@ def create_new_model_like(
new_model_patterns,
dest_file=dest_file,
add_copied_from=False,
attrs_to_remove=["pipeline_model_mapping", "is_pipeline_test_to_skip"],
)
disabled_fx_test = disabled_fx_test | disable_fx_test(dest_file)
......
......@@ -210,6 +210,18 @@ class CTRLModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
test_resize_embeddings = False
test_head_masking = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "ZeroShotClassificationPipelineTests":
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `CTRLConfig` was never used in pipeline tests, either because of a missing checkpoint or because a tiny
# config could not be created.
return True
return False
def setUp(self):
self.model_tester = CTRLModelTester(self)
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
......
......@@ -185,6 +185,18 @@ class TFCTRLModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
test_head_masking = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "ZeroShotClassificationPipelineTests":
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `CTRLConfig` was never used in pipeline tests, either because of a missing checkpoint or because a tiny
# config could not be created.
return True
return False
def setUp(self):
self.model_tester = TFCTRLModelTester(self)
self.config_tester = ConfigTester(self, config_class=CTRLConfig, n_embd=37)
......
......@@ -390,6 +390,26 @@ class FlaubertModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
else {}
)
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "FillMaskPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `FlaubertConfig` was never used in pipeline tests: cannot create a simple tokenizer
return True
elif (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
# Flaubert has 2 QA models -> need to manually set the correct labels for one of them here
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
......
......@@ -306,6 +306,26 @@ class TFFlaubertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
test_head_masking = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "FillMaskPipelineTests":
# Get `ValueError: AttributeError: 'NoneType' object has no attribute 'new_ones'` or `AssertionError`.
# `FlaubertConfig` was never used in pipeline tests: cannot create a simple tokenizer
return True
elif (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = TFFlaubertModelTester(self)
self.config_tester = ConfigTester(self, config_class=FlaubertConfig, emb_dim=37)
......
......@@ -299,6 +299,15 @@ class FNetModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
test_head_masking = False
test_pruning = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "QAPipelineTests":
return True
return False
# special case for ForPreTraining model
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
......
......@@ -385,6 +385,22 @@ class GPTJModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
test_model_parallel = False
test_head_masking = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
# special case for DoubleHeads model
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
......
......@@ -318,6 +318,22 @@ class TFGPTJModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, PipelineTester
test_missing_keys = False
test_head_masking = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = TFGPTJModelTester(self)
self.config_tester = ConfigTester(self, config_class=GPTJConfig, n_embd=37)
......
......@@ -246,6 +246,20 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
)
fx_compatible = True
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
return True
return False
def setUp(self):
self.model_tester = LayoutLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=LayoutLMConfig, hidden_size=37)
......
......@@ -282,6 +282,29 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
else {}
)
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name in [
"QAPipelineTests",
"TextClassificationPipelineTests",
"TokenClassificationPipelineTests",
"ZeroShotClassificationPipelineTests",
]:
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
return True
elif (
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
return True
return False
def setUp(self):
self.model_tester = LayoutLMv2ModelTester(self)
self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37)
......
......@@ -298,6 +298,12 @@ class LayoutLMv3ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
else {}
)
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
return True
def setUp(self):
self.model_tester = LayoutLMv3ModelTester(self)
self.config_tester = ConfigTester(self, config_class=LayoutLMv3Config, hidden_size=37)
......
......@@ -291,6 +291,12 @@ class TFLayoutLMv3ModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te
test_resize_embeddings = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
return True
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
inputs_dict = copy.deepcopy(inputs_dict)
......
......@@ -244,6 +244,12 @@ class LiltModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
fx_compatible = False
test_pruning = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
return True
def setUp(self):
self.model_tester = LiltModelTester(self)
self.config_tester = ConfigTester(self, config_class=LiltConfig, hidden_size=37)
......
......@@ -326,6 +326,22 @@ class LongformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
else {}
)
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = LongformerModelTester(self)
self.config_tester = ConfigTester(self, config_class=LongformerConfig, hidden_size=37)
......
......@@ -299,6 +299,22 @@ class TFLongformerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Te
test_head_masking = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = TFLongformerModelTester(self)
self.config_tester = ConfigTester(self, config_class=LongformerConfig, hidden_size=37)
......
......@@ -246,6 +246,17 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
test_pruning = False
test_missing_keys = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "TranslationPipelineTests":
# Get `ValueError: Translation requires a `src_lang` and a `tgt_lang` for this model`.
# `M2M100Config` was never used in pipeline tests: cannot create a simple tokenizer.
return True
return False
def setUp(self):
self.model_tester = M2M100ModelTester(self)
self.config_tester = ConfigTester(self, config_class=M2M100Config)
......
......@@ -433,6 +433,22 @@ class MvpModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
test_pruning = False
test_missing_keys = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = MvpModelTester(self)
self.config_tester = ConfigTester(self, config_class=MvpConfig)
......
......@@ -210,6 +210,18 @@ class OpenAIGPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTester
else {}
)
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "ZeroShotClassificationPipelineTests":
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `OpenAIGPTConfig` was never used in pipeline tests, either because of a missing checkpoint or because a
# tiny config could not be created.
return True
return False
# special case for DoubleHeads model
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
......
......@@ -214,6 +214,18 @@ class TFOpenAIGPTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Tes
test_head_masking = False
test_onnx = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if pipeline_test_casse_name == "ZeroShotClassificationPipelineTests":
# Get `tokenizer does not have a padding token` error for both fast/slow tokenizers.
# `OpenAIGPTConfig` was never used in pipeline tests, either because of a missing checkpoint or because a
# tiny config could not be created.
return True
return False
def setUp(self):
self.model_tester = TFOpenAIGPTModelTester(self)
self.config_tester = ConfigTester(self, config_class=OpenAIGPTConfig, n_embd=37)
......
......@@ -207,6 +207,22 @@ class OPTModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
test_pruning = False
test_missing_keys = False
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "QAPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `QAPipelineTests` fails for a few models when the slower tokenizer are used.
# (The slower tokenizers were never used for pipeline tests before the pipeline testing rework)
# TODO: check (and possibly fix) the `QAPipelineTests` with slower tokenizer
return True
return False
def setUp(self):
self.model_tester = OPTModelTester(self)
self.config_tester = ConfigTester(self, config_class=OPTConfig)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment