Unverified Commit c749bd40 authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Pipeline testing - using tiny models on Hub (#20426)



* rework pipeline tests

* run pipeline tests

* fix

* fix

* fix

* revert the changes in get_test_pipeline() parameter list

* fix expected error message

* skip a test

* clean up

---------
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent a582cfce
......@@ -434,7 +434,7 @@ def create_circleci_config(folder=None):
example_file = os.path.join(folder, "examples_test_list.txt")
if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
jobs.extend(EXAMPLES_TESTS)
repo_util_file = os.path.join(folder, "test_repo_utils.txt")
if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
jobs.extend(REPO_UTIL_TESTS)
......
......@@ -27,8 +27,8 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
def get_test_pipeline(self, model, tokenizer, processor):
audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=processor)
# test with a raw waveform
audio = np.zeros((34000,))
......
......@@ -60,7 +60,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
+ (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
}
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
if tokenizer is None:
# Side effect of no Fast Tokenizer class for these model, so skipping
# But the slow tokenizer test should still run as they're quite small
......@@ -69,7 +69,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
# return None, None
speech_recognizer = AutomaticSpeechRecognitionPipeline(
model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
model=model, tokenizer=tokenizer, feature_extractor=processor
)
# test with a raw waveform
......@@ -133,7 +133,9 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=Pipel
)
else:
# Non CTC models cannot use return_timestamps
with self.assertRaisesRegex(ValueError, "^We cannot return_timestamps yet on non-ctc models !$"):
with self.assertRaisesRegex(
ValueError, "^We cannot return_timestamps yet on non-ctc models apart from Whisper !$"
):
outputs = speech_recognizer(audio, return_timestamps="char")
@require_torch
......
This diff is collapsed.
......@@ -53,7 +53,7 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
else []
)
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
return conversation_agent, [Conversation("Hi there!")]
......
......@@ -47,8 +47,8 @@ class DepthEstimationPipelineTests(unittest.TestCase, metaclass=PipelineTestCase
model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=feature_extractor)
def get_test_pipeline(self, model, tokenizer, processor):
depth_estimator = DepthEstimationPipeline(model=model, feature_extractor=processor)
return depth_estimator, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
......
......@@ -59,9 +59,9 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase, metaclass=Pipeli
@require_pytesseract
@require_vision
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
dqa_pipeline = pipeline(
"document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
"document-question-answering", model=model, tokenizer=tokenizer, feature_extractor=processor
)
image = INVOICE_URL
......
......@@ -175,7 +175,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
raise ValueError("We expect lists of floats, nothing else")
return shape
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
if tokenizer is None:
self.skipTest("No tokenizer")
return
......@@ -196,9 +196,7 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
)
return
feature_extractor = FeatureExtractionPipeline(
model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
)
feature_extractor = FeatureExtractionPipeline(model=model, tokenizer=tokenizer, feature_extractor=processor)
return feature_extractor, ["This is a test", "This is another test"]
def run_pipeline_test(self, feature_extractor, examples):
......
......@@ -206,7 +206,7 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker, [])
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
if tokenizer is None or tokenizer.mask_token_id is None:
self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
......
......@@ -49,8 +49,8 @@ class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor, top_k=2)
def get_test_pipeline(self, model, tokenizer, processor):
image_classifier = ImageClassificationPipeline(model=model, feature_extractor=processor, top_k=2)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
......
......@@ -81,10 +81,8 @@ class ImageSegmentationPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
+ (MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING.items() if MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING else [])
}
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
image_segmenter = ImageSegmentationPipeline(
model=model, feature_extractor=feature_extractor, image_processor=image_processor
)
def get_test_pipeline(self, model, tokenizer, processor):
image_segmenter = ImageSegmentationPipeline(model=model, image_processor=processor)
return image_segmenter, [
"./tests/fixtures/tests_samples/COCO/000000039769.png",
"./tests/fixtures/tests_samples/COCO/000000039769.png",
......
......@@ -36,8 +36,8 @@ class ImageToTextPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
tf_model_mapping = TF_MODEL_FOR_VISION_2_SEQ_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=feature_extractor)
def get_test_pipeline(self, model, tokenizer, processor):
pipe = pipeline("image-to-text", model=model, tokenizer=tokenizer, feature_extractor=processor)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"./tests/fixtures/tests_samples/COCO/000000039769.png",
......
......@@ -51,8 +51,8 @@ else:
class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
def get_test_pipeline(self, model, tokenizer, processor):
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=processor)
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
def run_pipeline_test(self, object_detector, examples):
......
......@@ -31,7 +31,7 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way
# to switch on those models.
......
......@@ -34,7 +34,7 @@ class SummarizationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMe
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
summarizer = SummarizationPipeline(model=model, tokenizer=tokenizer)
return summarizer, ["(CNN)The Palestinian Authority officially became", "Some other text"]
......
......@@ -34,7 +34,7 @@ class Text2TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
generator = Text2TextGenerationPipeline(model=model, tokenizer=tokenizer)
return generator, ["Something to write", "Something else"]
......
......@@ -129,7 +129,7 @@ class TextClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestC
outputs = text_classifier("Birds are a type of animal")
self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}])
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer)
return text_classifier, ["HuggingFace is in", "This is another test"]
......
......@@ -143,7 +143,7 @@ class TextGenerationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
],
)
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
text_generator = TextGenerationPipeline(model=model, tokenizer=tokenizer)
return text_generator, ["This is a test", "Another test"]
......
......@@ -37,7 +37,7 @@ class TokenClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
model_mapping = MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
tf_model_mapping = TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
token_classifier = TokenClassificationPipeline(model=model, tokenizer=tokenizer)
return token_classifier, ["A simple string", "A simple string that is quite a bit longer"]
......
......@@ -34,7 +34,7 @@ class TranslationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta
model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
tf_model_mapping = TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
def get_test_pipeline(self, model, tokenizer, feature_extractor, image_processor):
def get_test_pipeline(self, model, tokenizer, processor):
if isinstance(model.config, MBartConfig):
src_lang, tgt_lang = list(tokenizer.lang_code_to_id.keys())[:2]
translator = TranslationPipeline(model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment