"docs/source/vscode:/vscode.git/clone" did not exist on "03835af70038730808a59027d23b06629635ac5e"
Unverified Commit 32b08742 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

`DocumentQuestionAnsweringPipeline` only for fast tokenizers (#22745)



* fix

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent 4def2fe9
......@@ -131,6 +131,11 @@ class DocumentQuestionAnsweringPipeline(ChunkPipeline):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.tokenizer is not None and not self.tokenizer.__class__.__name__.endswith("Fast"):
raise ValueError(
"`DocumentQuestionAnsweringPipeline` requires a fast tokenizer, but a slow tokenizer "
f"(`{self.tokenizer.__class__.__name__}`) is provided."
)
if self.model.config.__class__.__name__ == "VisionEncoderDecoderConfig":
self.model_type = ModelType.VisionEncoderDecoder
......
......@@ -246,20 +246,6 @@ class LayoutLMModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase
)
fx_compatible = True
# TODO: Fix the failed tests
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
if (
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
return True
return False
def setUp(self):
self.model_tester = LayoutLMModelTester(self)
self.config_tester = ConfigTester(self, config_class=LayoutLMConfig, hidden_size=37)
......
......@@ -295,15 +295,10 @@ class LayoutLMv2ModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCa
# `LayoutLMv2Config` was never used in pipeline tests (`test_pt_LayoutLMv2Config_XXX`) due to lack of tiny
# config. With new tiny model creation, it is available, but we need to fix the failed tests.
return True
elif (
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# This pipeline uses `sequence_ids()` which is only available for fast tokenizers.
return True
return False
return super().is_pipeline_test_to_skip(
pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
)
def setUp(self):
self.model_tester = LayoutLMv2ModelTester(self)
......
......@@ -428,9 +428,19 @@ class PipelineTesterMixin:
def test_pipeline_zero_shot_object_detection(self):
self.run_task_tests(task="zero-shot-object-detection")
# This contains the test cases to be skipped without model architecture being involved.
def is_pipeline_test_to_skip(
self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
):
# No fix is required for this case.
if (
pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
and tokenizer_name is not None
and not tokenizer_name.endswith("Fast")
):
# `DocumentQuestionAnsweringPipelineTests` requires a fast tokenizer.
return True
return False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment